From 1b96f28e41ecb466a42e5a86c6bcdd97c510e0bb Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Sun, 25 Jan 2004 21:30:41 +0000 Subject: [PATCH] Refactored statistical aggregates to separate stored, storeless implementations. Changed internal sample size counters to longs. git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@141064 13f79535-47bb-0310-9956-ffa450edef68 --- .../math/random/EmpiricalDistribution.java | 6 +- .../random/EmpiricalDistributionImpl.java | 20 +- .../commons/math/random/ValueServer.java | 3 +- .../stat/AbstractDescriptiveStatistics.java | 155 ++++++++++-- .../math/stat/DescriptiveStatistics.java | 15 +- .../math/stat/DescriptiveStatisticsImpl.java | 50 +++- .../commons/math/stat/StatisticalSummary.java | 100 ++++++++ .../StorelessDescriptiveStatisticsImpl.java | 208 ---------------- .../commons/math/stat/SummaryStatistics.java | 170 +++++++++++++ ...istics.java => SummaryStatisticsImpl.java} | 226 ++++++------------ .../commons/math/stat/TestStatistic.java | 28 +-- .../commons/math/stat/TestStatisticImpl.java | 40 ++-- .../stat/univariate/moment/FirstMoment.java | 8 +- .../stat/univariate/moment/GeometricMean.java | 6 +- .../math/stat/univariate/moment/Kurtosis.java | 6 +- .../math/stat/univariate/moment/Skewness.java | 6 +- .../math/stat/univariate/moment/Variance.java | 10 +- .../math/stat/univariate/rank/Max.java | 6 +- .../math/stat/univariate/rank/Min.java | 6 +- .../random/EmpiricalDistributionTest.java | 7 +- .../commons/math/random/RandomDataTest.java | 7 +- .../commons/math/random/ValueServerTest.java | 9 +- .../commons/math/stat/CertifiedDataTest.java | 59 +++-- .../commons/math/stat/ListUnivariateImpl.java | 27 ++- .../math/stat/ListUnivariateImplTest.java | 4 +- .../math/stat/StoreUnivariateImplTest.java | 4 +- .../commons/math/stat/TestStatisticTest.java | 94 ++------ ...relessUnivariateStatisticAbstractTest.java | 4 +- .../stat/univariate/UnivariateImplTest.java | 79 +----- .../UnivariateStatisticAbstractTest.java | 4 +- 30 files changed, 710 insertions(+), 657 deletions(-) create mode 100644 src/java/org/apache/commons/math/stat/StatisticalSummary.java delete mode 100644 src/java/org/apache/commons/math/stat/StorelessDescriptiveStatisticsImpl.java create mode 100644 src/java/org/apache/commons/math/stat/SummaryStatistics.java rename src/java/org/apache/commons/math/stat/{AbstractStorelessDescriptiveStatistics.java => SummaryStatisticsImpl.java} (51%) diff --git a/src/java/org/apache/commons/math/random/EmpiricalDistribution.java b/src/java/org/apache/commons/math/random/EmpiricalDistribution.java index 67b583fdb..05a8d2ce6 100644 --- a/src/java/org/apache/commons/math/random/EmpiricalDistribution.java +++ b/src/java/org/apache/commons/math/random/EmpiricalDistribution.java @@ -59,7 +59,7 @@ import java.io.File; import java.net.URL; import java.util.ArrayList; -import org.apache.commons.math.stat.DescriptiveStatistics; +import org.apache.commons.math.stat.SummaryStatistics; /** * Represents an @@ -81,7 +81,7 @@ import org.apache.commons.math.stat.DescriptiveStatistics; * build grouped frequnecy histograms representing the input data or to * generate random values "like" those in the input file -- i.e., the values * generated will follow the distribution of the values in the file. - * @version $Revision: 1.12 $ $Date: 2004/01/15 05:22:08 $ + * @version $Revision: 1.13 $ $Date: 2004/01/25 21:30:41 $ */ public interface EmpiricalDistribution { @@ -123,7 +123,7 @@ public interface EmpiricalDistribution { * @return the sample statistics * @throws IllegalStateException if the distribution has not been loaded */ - DescriptiveStatistics getSampleStats() throws IllegalStateException; + SummaryStatistics getSampleStats() throws IllegalStateException; /** * Loads a saved distribution from a file. diff --git a/src/java/org/apache/commons/math/random/EmpiricalDistributionImpl.java b/src/java/org/apache/commons/math/random/EmpiricalDistributionImpl.java index ba6ca4593..37e7b3f60 100644 --- a/src/java/org/apache/commons/math/random/EmpiricalDistributionImpl.java +++ b/src/java/org/apache/commons/math/random/EmpiricalDistributionImpl.java @@ -65,7 +65,7 @@ import java.io.InputStreamReader; import java.net.URL; import org.apache.commons.math.stat.DescriptiveStatistics; -import org.apache.commons.math.stat.StorelessDescriptiveStatisticsImpl; +import org.apache.commons.math.stat.SummaryStatistics; /** * Implements EmpiricalDistribution interface. This implementation @@ -92,7 +92,7 @@ import org.apache.commons.math.stat.StorelessDescriptiveStatisticsImpl; * entry per line. *

* - * @version $Revision: 1.13 $ $Date: 2004/01/15 05:22:08 $ + * @version $Revision: 1.14 $ $Date: 2004/01/25 21:30:41 $ */ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistribution { @@ -101,7 +101,7 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib private ArrayList binStats = null; /** Sample statistics */ - DescriptiveStatistics sampleStats = null; + SummaryStatistics sampleStats = null; /** number of bins */ private int binCount = 1000; @@ -175,7 +175,7 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib private void computeStats(BufferedReader in) throws IOException { String str = null; double val = 0.0; - sampleStats = new StorelessDescriptiveStatisticsImpl(); + sampleStats = SummaryStatistics.newInstance(); while ((str = in.readLine()) != null) { val = new Double(str).doubleValue(); sampleStats.addValue(val); @@ -205,7 +205,7 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib binStats.clear(); } for (int i = 0; i < binCount; i++) { - DescriptiveStatistics stats = new StorelessDescriptiveStatisticsImpl(); + SummaryStatistics stats = SummaryStatistics.newInstance(); binStats.add(i,stats); } @@ -224,7 +224,7 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib } if (val <= binUpperBounds[i]) { found = true; - DescriptiveStatistics stats = (DescriptiveStatistics)binStats.get(i); + SummaryStatistics stats = (SummaryStatistics)binStats.get(i); stats.addValue(val); } i++; @@ -236,11 +236,11 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib // Assign upperBounds based on bin counts upperBounds = new double[binCount]; upperBounds[0] = - ((double)((DescriptiveStatistics)binStats.get(0)).getN())/ + ((double)((SummaryStatistics)binStats.get(0)).getN())/ (double)sampleStats.getN(); for (int i = 1; i < binCount-1; i++) { upperBounds[i] = upperBounds[i-1] + - ((double)((DescriptiveStatistics)binStats.get(i)).getN())/ + ((double)((SummaryStatistics)binStats.get(i)).getN())/ (double)sampleStats.getN(); } upperBounds[binCount-1] = 1.0d; @@ -263,7 +263,7 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { - DescriptiveStatistics stats = (DescriptiveStatistics)binStats.get(i); + SummaryStatistics stats = (SummaryStatistics)binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return randomData.nextGaussian @@ -295,7 +295,7 @@ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistrib throw new UnsupportedOperationException("Not Implemented yet :-("); } - public DescriptiveStatistics getSampleStats() { + public SummaryStatistics getSampleStats() { return sampleStats; } diff --git a/src/java/org/apache/commons/math/random/ValueServer.java b/src/java/org/apache/commons/math/random/ValueServer.java index a1095980d..ae1ce6252 100644 --- a/src/java/org/apache/commons/math/random/ValueServer.java +++ b/src/java/org/apache/commons/math/random/ValueServer.java @@ -78,7 +78,7 @@ import java.net.MalformedURLException; * standard deviation = sigma *
  • CONSTANT_MODE -- returns mu every time.
  • * - * @version $Revision: 1.10 $ $Date: 2004/01/15 05:22:08 $ + * @version $Revision: 1.11 $ $Date: 2004/01/25 21:30:41 $ * */ public class ValueServer implements Serializable { @@ -240,7 +240,6 @@ public class ValueServer implements Serializable { * Sets the valuesFileURL using a string URL representation * @param url String representation for new valuesFileURL. * @throws MalformedURLException if url is not well formed - * @deprecated use {@link #setValuesFileURL(URL)} to be removed before 0.1 release */ public void setValuesFileURL(String url) throws MalformedURLException { this.valuesFileURL = new URL(url); diff --git a/src/java/org/apache/commons/math/stat/AbstractDescriptiveStatistics.java b/src/java/org/apache/commons/math/stat/AbstractDescriptiveStatistics.java index aaa098f93..7944d6c9b 100644 --- a/src/java/org/apache/commons/math/stat/AbstractDescriptiveStatistics.java +++ b/src/java/org/apache/commons/math/stat/AbstractDescriptiveStatistics.java @@ -55,40 +55,157 @@ package org.apache.commons.math.stat; import java.util.Arrays; +import org.apache.commons.math.stat.univariate.moment.GeometricMean; +import org.apache.commons.math.stat.univariate.moment.Kurtosis; +import org.apache.commons.math.stat.univariate.moment.Mean; +import org.apache.commons.math.stat.univariate.moment.Skewness; +import org.apache.commons.math.stat.univariate.moment.Variance; +import org.apache.commons.math.stat.univariate.rank.Max; +import org.apache.commons.math.stat.univariate.rank.Min; import org.apache.commons.math.stat.univariate.rank.Percentile; +import org.apache.commons.math.stat.univariate.summary.Sum; +import org.apache.commons.math.stat.univariate.summary.SumOfSquares; +import org.apache.commons.math.stat.univariate.UnivariateStatistic; /** - * Extends {@link AbstractStorelessDescriptiveStatistics} to include univariate statistics - * that may require access to the full set of sample values. - * @version $Revision: 1.2 $ $Date: 2004/01/18 03:45:02 $ + * Abstract superclass for DescriptiveStatistics implementations. + * + * @version $Revision: 1.3 $ $Date: 2004/01/25 21:30:41 $ */ public abstract class AbstractDescriptiveStatistics - extends AbstractStorelessDescriptiveStatistics { - - /** Percentile */ - protected Percentile percentile = new Percentile(50); + extends DescriptiveStatistics { /** * Create an AbstractDescriptiveStatistics */ public AbstractDescriptiveStatistics() { - super(); } /** * Create an AbstractDescriptiveStatistics with a specific Window * @param window WindowSIze for stat calculation */ - public AbstractDescriptiveStatistics(int window) { - super(window); + public AbstractDescriptiveStatistics(int window) { + setWindowSize(window); } /** - * @see org.apache.commons.math.stat.DescriptiveStatistics#getPercentile(double) + * @see org.apache.commons.math.stat.DescriptiveStatistics#getSum() + */ + public double getSum() { + return apply(new Sum()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getSumsq() + */ + public double getSumsq() { + return apply(new SumOfSquares()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getMean() + */ + public double getMean() { + return apply(new Mean()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getStandardDeviation() + */ + public double getStandardDeviation() { + double stdDev = Double.NaN; + if (getN() > 0) { + if (getN() > 1) { + stdDev = Math.sqrt(getVariance()); + } else { + stdDev = 0.0; + } + } + return (stdDev); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getVariance() + */ + public double getVariance() { + return apply(new Variance()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getSkewness() + */ + public double getSkewness() { + return apply(new Skewness()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getKurtosis() + */ + public double getKurtosis() { + return apply(new Kurtosis()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getKurtosisClass() + */ + public int getKurtosisClass() { + int kClass = MESOKURTIC; + + double kurtosis = getKurtosis(); + if (kurtosis > 0) { + kClass = LEPTOKURTIC; + } else if (kurtosis < 0) { + kClass = PLATYKURTIC; + } + return (kClass); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getMax() + */ + public double getMax() { + return apply(new Max()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getMin() + */ + public double getMin() { + return apply(new Min()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getGeometricMean() + */ + public double getGeometricMean() { + return apply(new GeometricMean()); + } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getPercentile() */ public double getPercentile(double p) { - percentile.setPercentile(p); - return apply(percentile); + return apply(new Percentile(p)); + } + + /** + * Generates a text report displaying + * univariate statistics from values that + * have been added. + * @return String with line feeds displaying statistics + */ + public String toString() { + StringBuffer outBuffer = new StringBuffer(); + outBuffer.append("UnivariateImpl:\n"); + outBuffer.append("n: " + getN() + "\n"); + outBuffer.append("min: " + getMin() + "\n"); + outBuffer.append("max: " + getMax() + "\n"); + outBuffer.append("mean: " + getMean() + "\n"); + outBuffer.append("std dev: " + getStandardDeviation() + "\n"); + outBuffer.append("skewness: " + getSkewness() + "\n"); + outBuffer.append("kurtosis: " + getKurtosis() + "\n"); + return outBuffer.toString(); } /** @@ -101,7 +218,7 @@ public abstract class AbstractDescriptiveStatistics } /** - * @see org.apache.commons.math.stat.Univariate#addValue(double) + * @see org.apache.commons.math.stat.DescriptiveStatistics#addValue(double) */ public abstract void addValue(double value); @@ -110,12 +227,14 @@ public abstract class AbstractDescriptiveStatistics */ public abstract double[] getValues(); - /** * @see org.apache.commons.math.stat.DescriptiveStatistics#getElement(int) */ public abstract double getElement(int index); - - - + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#apply(UnivariateStatistic) + */ + public abstract double apply(UnivariateStatistic stat); + } diff --git a/src/java/org/apache/commons/math/stat/DescriptiveStatistics.java b/src/java/org/apache/commons/math/stat/DescriptiveStatistics.java index f1e27204d..f7f8f6e7d 100644 --- a/src/java/org/apache/commons/math/stat/DescriptiveStatistics.java +++ b/src/java/org/apache/commons/math/stat/DescriptiveStatistics.java @@ -57,12 +57,14 @@ import java.io.Serializable; import org.apache.commons.discovery.tools.DiscoverClass; +import org.apache.commons.math.stat.univariate.UnivariateStatistic; + /** * Abstract factory class for univariate statistical summaries. * - * @version $Revision: 1.3 $ $Date: 2004/01/18 03:45:02 $ + * @version $Revision: 1.4 $ $Date: 2004/01/25 21:30:41 $ */ -public abstract class DescriptiveStatistics implements Serializable{ +public abstract class DescriptiveStatistics implements Serializable, StatisticalSummary { /** * Create an instance of a DescriptiveStatistics @@ -195,7 +197,7 @@ public abstract class DescriptiveStatistics implements Serializable{ * Returns the number of available values * @return The number of available values */ - public abstract int getN(); + public abstract long getN(); /** * Returns the sum of the values that have been added to Univariate. @@ -279,5 +281,12 @@ public abstract class DescriptiveStatistics implements Serializable{ * values */ public abstract double getPercentile(double p); + + /** + * Apply the given statistic to the data associated with this set of statistics. + * @param stat the statistic to apply + * @return the computed value of the statistic. + */ + public abstract double apply(UnivariateStatistic stat); } diff --git a/src/java/org/apache/commons/math/stat/DescriptiveStatisticsImpl.java b/src/java/org/apache/commons/math/stat/DescriptiveStatisticsImpl.java index a35a68889..431ca9ec1 100644 --- a/src/java/org/apache/commons/math/stat/DescriptiveStatisticsImpl.java +++ b/src/java/org/apache/commons/math/stat/DescriptiveStatisticsImpl.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,26 +55,44 @@ package org.apache.commons.math.stat; import java.io.Serializable; -import org.apache.commons.math.stat.univariate.*; +import java.util.Arrays; + +import org.apache.commons.math.stat.univariate.UnivariateStatistic; import org.apache.commons.math.util.ContractableDoubleArray; /** - * @version $Revision: 1.2 $ $Date: 2003/11/19 03:28:23 $ + * @version $Revision: 1.3 $ $Date: 2004/01/25 21:30:41 $ */ public class DescriptiveStatisticsImpl extends AbstractDescriptiveStatistics implements Serializable { - /** A contractable double array is used. memory is reclaimed when - * the storage of the array becomes too empty. + /** hold the window size **/ + protected int windowSize = INFINITE_WINDOW; + + /** + * Stored data values */ protected ContractableDoubleArray eDA; /** - * Construct a DescriptiveStatisticsImpl + * Construct a DescriptiveStatisticsImpl with infinite window */ public DescriptiveStatisticsImpl() { + super(); eDA = new ContractableDoubleArray(); } + + /** + * Construct a DescriptiveStatisticsImpl with finite window + */ + public DescriptiveStatisticsImpl(int window) { + super(window); + eDA = new ContractableDoubleArray(); + } + public int getWindowSize() { + return windowSize; + } + /** * @see org.apache.commons.math.stat.DescriptiveStatistics#getValues() */ @@ -89,6 +107,15 @@ public class DescriptiveStatisticsImpl extends AbstractDescriptiveStatistics imp eDA.getNumElements()); return copiedArray; } + + /** + * @see org.apache.commons.math.stat.DescriptiveStatistics#getSortedValues() + */ + public double[] getSortedValues() { + double[] sort = getValues(); + Arrays.sort(sort); + return sort; + } /** * @see org.apache.commons.math.stat.DescriptiveStatistics#getElement(int) @@ -98,14 +125,14 @@ public class DescriptiveStatisticsImpl extends AbstractDescriptiveStatistics imp } /** - * @see org.apache.commons.math.stat.Univariate#getN() + * @see org.apache.commons.math.stat.DescriptiveStatistics#getN() */ - public int getN() { + public long getN() { return eDA.getNumElements(); } /** - * @see org.apache.commons.math.stat.Univariate#addValue(double) + * @see org.apache.commons.math.stat.DescriptiveStatistics#addValue(double) */ public synchronized void addValue(double v) { if (windowSize != INFINITE_WINDOW) { @@ -125,15 +152,14 @@ public class DescriptiveStatisticsImpl extends AbstractDescriptiveStatistics imp } /** - * @see org.apache.commons.math.stat.Univariate#clear() + * @see org.apache.commons.math.stat.DescriptiveStatistics#clear() */ public synchronized void clear() { - super.clear(); eDA.clear(); } /** - * @see org.apache.commons.math.stat.Univariate#setWindowSize(int) + * @see org.apache.commons.math.stat.DescriptiveStatistics#setWindowSize(int) */ public synchronized void setWindowSize(int windowSize) { this.windowSize = windowSize; diff --git a/src/java/org/apache/commons/math/stat/StatisticalSummary.java b/src/java/org/apache/commons/math/stat/StatisticalSummary.java new file mode 100644 index 000000000..7b70d2eb3 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/StatisticalSummary.java @@ -0,0 +1,100 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2004 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowledgement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgement may appear in the software itself, + * if and wherever such third-party acknowledgements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their name without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ +package org.apache.commons.math.stat; + +/** + * Reporting interface for basic univariate statistics. + * + * @version $Revision: 1.1 $ $Date: 2004/01/25 21:30:41 $ + */ +public interface StatisticalSummary { + /** + * Returns the
    + * arithmetic mean of the available values + * @return The mean or Double.NaN if no values have been added. + */ + public abstract double getMean(); + /** + * Returns the variance of the available values. + * @return The variance, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + public abstract double getVariance(); + /** + * Returns the standard deviation of the available values. + * @return The standard deviation, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + public abstract double getStandardDeviation(); + /** + * Returns the maximum of the available values + * @return The max or Double.NaN if no values have been added. + */ + public abstract double getMax(); + /** + * Returns the minimum of the available values + * @return The min or Double.NaN if no values have been added. + */ + public abstract double getMin(); + /** + * Returns the number of available values + * @return The number of available values + */ + public abstract long getN(); + /** + * Returns the sum of the values that have been added to Univariate. + * @return The sum or Double.NaN if no values have been added + */ + public abstract double getSum(); +} \ No newline at end of file diff --git a/src/java/org/apache/commons/math/stat/StorelessDescriptiveStatisticsImpl.java b/src/java/org/apache/commons/math/stat/StorelessDescriptiveStatisticsImpl.java deleted file mode 100644 index 73f110aea..000000000 --- a/src/java/org/apache/commons/math/stat/StorelessDescriptiveStatisticsImpl.java +++ /dev/null @@ -1,208 +0,0 @@ -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2003 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, if - * any, must include the following acknowledgement: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgement may appear in the software itself, - * if and wherever such third-party acknowledgements normally appear. - * - * 4. The names "The Jakarta Project", "Commons", and "Apache Software - * Foundation" must not be used to endorse or promote products derived - * from this software without prior written permission. For written - * permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their name without prior written - * permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ -package org.apache.commons.math.stat; - -import java.io.Serializable; - -import org.apache.commons.math.stat.univariate.*; -import org.apache.commons.math.util.FixedDoubleArray; - -/** - * - * Accumulates univariate statistics for values fed in - * through the addValue() method. Does not store raw data values. - * All data are represented internally as doubles. - * Integers, floats and longs can be added, but they will be converted - * to doubles by addValue(). - * - * @version $Revision: 1.2 $ $Date: 2003/11/19 03:28:23 $ -*/ -public class StorelessDescriptiveStatisticsImpl extends AbstractStorelessDescriptiveStatistics implements Serializable { - - /** fixed storage */ - private FixedDoubleArray storage = null; - - /** Creates new univariate with an infinite window */ - public StorelessDescriptiveStatisticsImpl() { - super(); - } - - /** - * Creates a new univariate with a fixed window - * @param window Window Size - */ - public StorelessDescriptiveStatisticsImpl(int window) { - super(window); - storage = new FixedDoubleArray(window); - } - - /** - * If windowSize is set to Infinite, moments - * are calculated using the following - * - * recursive strategy - * . - * Otherwise, stat methods delegate to StatUtils. - * @see org.apache.commons.math.stat.Univariate#addValue(double) - */ - public void addValue(double value) { - - if (storage != null) { - /* then all getters deligate to StatUtils - * and this clause simply adds/rolls a value in the storage array - */ - if (getWindowSize() == n) { - storage.addElementRolling(value); - } else { - n++; - storage.addElement(value); - } - - } else { - /* If the windowSize is infinite don't store any values and there - * is no need to discard the influence of any single item. - */ - n++; - min.increment(value); - max.increment(value); - sum.increment(value); - sumsq.increment(value); - sumLog.increment(value); - geoMean.increment(value); - - moment.increment(value); - //mean.increment(value); - //variance.increment(value); - //skewness.increment(value); - //kurtosis.increment(value); - } - } - - /** - * Generates a text report displaying - * univariate statistics from values that - * have been added. - * @return String with line feeds displaying statistics - */ - public String toString() { - StringBuffer outBuffer = new StringBuffer(); - outBuffer.append("UnivariateImpl:\n"); - outBuffer.append("n: " + getN() + "\n"); - outBuffer.append("min: " + getMin() + "\n"); - outBuffer.append("max: " + getMax() + "\n"); - outBuffer.append("mean: " + getMean() + "\n"); - outBuffer.append("std dev: " + getStandardDeviation() + "\n"); - outBuffer.append("skewness: " + getSkewness() + "\n"); - outBuffer.append("kurtosis: " + getKurtosis() + "\n"); - return outBuffer.toString(); - } - - /** - * @see org.apache.commons.math.stat.Univariate#clear() - */ - public void clear() { - super.clear(); - if (getWindowSize() != INFINITE_WINDOW) { - storage = new FixedDoubleArray(getWindowSize()); - } - } - - /** - * Apply the given statistic to this univariate collection. - * @param stat the statistic to apply - * @return the computed value of the statistic. - */ - public double apply(UnivariateStatistic stat) { - - if (storage != null) { - return stat.evaluate( - storage.getValues(), - storage.start(), - storage.getNumElements()); - } else if (stat instanceof StorelessUnivariateStatistic) { - return ((StorelessUnivariateStatistic) stat).getResult(); - } - - return Double.NaN; - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.DescriptiveStatistics#getValues() - */ - public double[] getValues() { - throw new UnsupportedOperationException("Only Available with Finite Window"); - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.DescriptiveStatistics#getSortedValues() - */ - public double[] getSortedValues() { - throw new UnsupportedOperationException("Only Available with Finite Window"); - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.DescriptiveStatistics#getElement(int) - */ - public double getElement(int index) { - throw new UnsupportedOperationException("Only Available with Finite Window"); - } - - /* (non-Javadoc) - * @see org.apache.commons.math.stat.DescriptiveStatistics#getPercentile(double) - */ - public double getPercentile(double p) { - throw new UnsupportedOperationException("Only Available with Finite Window"); - } - -} \ No newline at end of file diff --git a/src/java/org/apache/commons/math/stat/SummaryStatistics.java b/src/java/org/apache/commons/math/stat/SummaryStatistics.java new file mode 100644 index 000000000..a58d00b68 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/SummaryStatistics.java @@ -0,0 +1,170 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2004 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowledgement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgement may appear in the software itself, + * if and wherever such third-party acknowledgements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their name without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ +package org.apache.commons.math.stat; + +import java.io.Serializable; + +import org.apache.commons.discovery.tools.DiscoverClass; + +/** + * Abstract factory class for univariate statistical summaries. + * + * @version $Revision: 1.1 $ $Date: 2004/01/25 21:30:41 $ + */ +public abstract class SummaryStatistics implements Serializable, StatisticalSummary{ + + /** + * Create an instance of a SummaryStatistics + * @return a new factory. + */ + public static SummaryStatistics newInstance(String cls) throws InstantiationException, IllegalAccessException, ClassNotFoundException { + return newInstance(Class.forName(cls)); + } + /** + * Create an instance of a DescriptiveStatistics + * @return a new factory. + */ + public static SummaryStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException { + return (SummaryStatistics)cls.newInstance(); + } + + /** + * Create an instance of a DescriptiveStatistics + * @return a new factory. + */ + public static SummaryStatistics newInstance() { + SummaryStatistics factory = null; + try { + DiscoverClass dc = new DiscoverClass(); + factory = (SummaryStatistics) dc.newInstance( + SummaryStatistics.class, + "org.apache.commons.math.stat.SummaryStatisticsImpl"); + } catch(Exception ex) { + // ignore as default implementation will be used. + } + return factory; + } + + /** + * Adds the value to the data to be summarized + * @param v the value to be added + */ + public abstract void addValue(double v); + + /** + * Returns the + * arithmetic mean of the available values + * @return The mean or Double.NaN if no values have been added. + */ + public abstract double getMean(); + + /** + * Returns the + * geometric mean of the available values + * @return The geometricMean, Double.NaN if no values have been added, + * or if the productof the available values is less than or equal to 0. + */ + public abstract double getGeometricMean(); + + /** + * Returns the variance of the available values. + * @return The variance, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + public abstract double getVariance(); + + /** + * Returns the standard deviation of the available values. + * @return The standard deviation, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + public abstract double getStandardDeviation(); + + /** + * Returns the maximum of the available values + * @return The max or Double.NaN if no values have been added. + */ + public abstract double getMax(); + + /** + * Returns the minimum of the available values + * @return The min or Double.NaN if no values have been added. + */ + public abstract double getMin(); + + /** + * Returns the number of available values + * @return The number of available values + */ + public abstract long getN(); + + /** + * Returns the sum of the values that have been added to Univariate. + * @return The sum or Double.NaN if no values have been added + */ + public abstract double getSum(); + + /** + * Returns the sum of the squares of the available values. + * @return The sum of the squares or Double.NaN if no + * values have been added. + */ + public abstract double getSumsq(); + + /** + * Resets all statistics + */ + public abstract void clear(); + +} diff --git a/src/java/org/apache/commons/math/stat/AbstractStorelessDescriptiveStatistics.java b/src/java/org/apache/commons/math/stat/SummaryStatisticsImpl.java similarity index 51% rename from src/java/org/apache/commons/math/stat/AbstractStorelessDescriptiveStatistics.java rename to src/java/org/apache/commons/math/stat/SummaryStatisticsImpl.java index 0dbad4598..18fec3123 100644 --- a/src/java/org/apache/commons/math/stat/AbstractStorelessDescriptiveStatistics.java +++ b/src/java/org/apache/commons/math/stat/SummaryStatisticsImpl.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003-2004 The Apache Software Foundation. All rights + * Copyright (c) 2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,12 +53,10 @@ */ package org.apache.commons.math.stat; -import org.apache.commons.math.stat.univariate.UnivariateStatistic; -import org.apache.commons.math.stat.univariate.moment.FourthMoment; +import org.apache.commons.math.stat.univariate.moment.SecondMoment; +import org.apache.commons.math.stat.univariate.moment.FirstMoment; import org.apache.commons.math.stat.univariate.moment.GeometricMean; -import org.apache.commons.math.stat.univariate.moment.Kurtosis; import org.apache.commons.math.stat.univariate.moment.Mean; -import org.apache.commons.math.stat.univariate.moment.Skewness; import org.apache.commons.math.stat.univariate.moment.Variance; import org.apache.commons.math.stat.univariate.rank.Max; import org.apache.commons.math.stat.univariate.rank.Min; @@ -67,21 +65,20 @@ import org.apache.commons.math.stat.univariate.summary.SumOfLogs; import org.apache.commons.math.stat.univariate.summary.SumOfSquares; /** - * Provides a default {@link DescriptiveStatistics} implementation, including only statistics - * that can be computed in one pass through the data without storing the full set of sample - * data values. - * @version $Revision: 1.2 $ $Date: 2004/01/18 03:45:02 $ + * Provides a default {@link SummaryStatistics} implementation. + * + * @version $Revision: 1.1 $ $Date: 2004/01/25 21:30:41 $ */ -public abstract class AbstractStorelessDescriptiveStatistics extends DescriptiveStatistics { - - /** hold the window size **/ - protected int windowSize = INFINITE_WINDOW; +public class SummaryStatisticsImpl extends SummaryStatistics { /** count of values that have been added */ - protected int n = 0; + protected long n = 0; - /** FourthMoment is used in calculating mean, variance,skew and kurtosis */ - protected FourthMoment moment = null; + /** FirstMoment is used to compute the mean */ + protected FirstMoment firstMoment = null; + + /** SecondMoment is used to compute the variance */ + protected SecondMoment secondMoment = null; /** sum of values that have been added */ protected Sum sum = null; @@ -107,63 +104,41 @@ public abstract class AbstractStorelessDescriptiveStatistics extends Descriptive /** variance of values that have been added */ protected Variance variance = null; - /** skewness of values that have been added */ - protected Skewness skewness = null; - - /** kurtosis of values that have been added */ - protected Kurtosis kurtosis = null; - /** - * Construct an AbstractStorelessDescriptiveStatistics + * Construct a SummaryStatistics */ - public AbstractStorelessDescriptiveStatistics() { - super(); - + public SummaryStatisticsImpl() { sum = new Sum(); sumsq = new SumOfSquares(); min = new Min(); max = new Max(); sumLog = new SumOfLogs(); geoMean = new GeometricMean(); - - moment = new FourthMoment(); - mean = new Mean(moment); - variance = new Variance(moment); - skewness = new Skewness(moment); - kurtosis = new Kurtosis(moment); + secondMoment = new SecondMoment(); + firstMoment = new FirstMoment(); } /** - * Construct an AbstractStorelessDescriptiveStatistics with a window - * @param window The Window Size + * Add a value to the data + * + * @param value the value to add */ - public AbstractStorelessDescriptiveStatistics(int window) { - this(); - setWindowSize(window); + public void addValue(double value) { + sum.increment(value); + sumsq.increment(value); + min.increment(value); + max.increment(value); + sumLog.increment(value); + geoMean.increment(value); + firstMoment.increment(value); + secondMoment.increment(value); + n++; } - /** - * Apply the given statistic to this univariate collection. - * @param stat the statistic to apply - * @return the computed value of the statistic. - */ - public abstract double apply(UnivariateStatistic stat); - - - /** - * If windowSize is set to Infinite, - * statistics are calculated using the following - * - * recursive strategy - * . - * @see org.apache.commons.math.stat.Univariate#addValue(double) - */ - public abstract void addValue(double value); - /** * @see org.apache.commons.math.stat.Univariate#getN() */ - public int getN() { + public long getN() { return n; } @@ -171,26 +146,37 @@ public abstract class AbstractStorelessDescriptiveStatistics extends Descriptive * @see org.apache.commons.math.stat.Univariate#getSum() */ public double getSum() { - return apply(sum); + return sum.getResult(); } /** - * @see org.apache.commons.math.stat.Univariate#getSumsq() + * Returns the sum of the squares of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    + * + * @return The sum of squares */ public double getSumsq() { - return apply(sumsq); + return sumsq.getResult(); } /** - * @see org.apache.commons.math.stat.Univariate#getMean() + * Returns the mean of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    + * + * @return the mean */ public double getMean() { - return apply(mean); + return new Mean(firstMoment).getResult(); } /** - * Returns the standard deviation for this collection of values - * @see org.apache.commons.math.stat.Univariate#getStandardDeviation() + * Returns the standard deviation of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    + * + * @return the standard deviation */ public double getStandardDeviation() { double stdDev = Double.NaN; @@ -205,104 +191,69 @@ public abstract class AbstractStorelessDescriptiveStatistics extends Descriptive } /** - * Returns the variance of the values that have been added via West's - * algorithm as described by - * Chan, T. F. and - * J. G. Lewis 1979, Communications of the ACM, - * vol. 22 no. 9, pp. 526-531.. + * Returns the variance of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    * - * @return The variance of a set of values. - * Double.NaN is returned for an empty - * set of values and 0.0 is returned for - * a <= 1 value set. + * @return the variance */ public double getVariance() { - return apply(variance); + return new Variance(secondMoment).getResult(); } /** - * Returns the skewness of the values that have been added as described by - * - * Equation (6) for k-Statistics. - * @return The skew of a set of values. Double.NaN is returned for - * an empty set of values and 0.0 is returned for a - * <= 2 value set. - */ - public double getSkewness() { - return apply(skewness); - } - - /** - * Returns the kurtosis of the values that have been added as described by - * - * Equation (7) for k-Statistics. + * Returns the maximum of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    * - * @return The kurtosis of a set of values. Double.NaN is returned for - * an empty set of values and 0.0 is returned for a <= 3 - * value set. - */ - public double getKurtosis() { - return apply(kurtosis); - } - - /** - * @see org.apache.commons.math.stat.DescriptiveStatistics#getKurtosisClass() - */ - public int getKurtosisClass() { - int kClass = MESOKURTIC; - - double kurtosis = getKurtosis(); - if (kurtosis > 0) { - kClass = LEPTOKURTIC; - } else if (kurtosis < 0) { - kClass = PLATYKURTIC; - } - return (kClass); - } - - /** - * @see org.apache.commons.math.stat.Univariate#getMax() + * @return the maximum */ public double getMax() { - return apply(max); + return max.getResult(); } /** - * @see org.apache.commons.math.stat.Univariate#getMin() + * Returns the minimum of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    + * + * @return the minimum */ public double getMin() { - return apply(min); + return min.getResult(); } /** - * @see org.apache.commons.math.stat.Univariate#getGeometricMean() - */ + * Returns the geometric mean of the values that have been added. + *

    + * Double.NaN is returned if no values have been added.

    + * + * @return the geometric mean + */ public double getGeometricMean() { - return apply(geoMean); + return geoMean.getResult(); } /** * Generates a text report displaying - * univariate statistics from values that + * summary statistics from values that * have been added. * @return String with line feeds displaying statistics */ public String toString() { StringBuffer outBuffer = new StringBuffer(); - outBuffer.append("UnivariateImpl:\n"); + outBuffer.append("SummaryStatistics:\n"); outBuffer.append("n: " + n + "\n"); outBuffer.append("min: " + min + "\n"); outBuffer.append("max: " + max + "\n"); outBuffer.append("mean: " + getMean() + "\n"); outBuffer.append("std dev: " + getStandardDeviation() + "\n"); - outBuffer.append("skewness: " + getSkewness() + "\n"); - outBuffer.append("kurtosis: " + getKurtosis() + "\n"); return outBuffer.toString(); } - /** - * @see org.apache.commons.math.stat.Univariate#clear() - */ + /** + * Resets all statistics and storage + */ public void clear() { this.n = 0; min.clear(); @@ -311,27 +262,8 @@ public abstract class AbstractStorelessDescriptiveStatistics extends Descriptive sumLog.clear(); sumsq.clear(); geoMean.clear(); - - moment.clear(); - mean.clear(); - variance.clear(); - skewness.clear(); - kurtosis.clear(); - } - - /** - * @see org.apache.commons.math.stat.Univariate#getWindowSize() - */ - public int getWindowSize() { - return windowSize; - } - - /** - * @see org.apache.commons.math.stat.Univariate#setWindowSize(int) - */ - public void setWindowSize(int windowSize) { - clear(); - this.windowSize = windowSize; + firstMoment.clear(); + secondMoment.clear(); } } \ No newline at end of file diff --git a/src/java/org/apache/commons/math/stat/TestStatistic.java b/src/java/org/apache/commons/math/stat/TestStatistic.java index 1bb68cd58..26d1cc898 100644 --- a/src/java/org/apache/commons/math/stat/TestStatistic.java +++ b/src/java/org/apache/commons/math/stat/TestStatistic.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,7 +58,7 @@ import org.apache.commons.math.MathException; /** * A collection of commonly used test statistics and statistical tests. * - * @version $Revision: 1.10 $ $Date: 2003/11/19 03:22:54 $ + * @version $Revision: 1.11 $ $Date: 2004/01/25 21:30:41 $ */ public interface TestStatistic { @@ -356,7 +356,7 @@ public interface TestStatistic { * @return t statistic * @throws IllegalArgumentException if the precondition is not met */ - double t(double mu, DescriptiveStatistics sampleStats) + double t(double mu, StatisticalSummary sampleStats) throws IllegalArgumentException, MathException; /** @@ -377,7 +377,7 @@ public interface TestStatistic { * @return t statistic * @throws IllegalArgumentException if the precondition is not met */ - double t(DescriptiveStatistics sampleStats1, DescriptiveStatistics sampleStats2) + double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) throws IllegalArgumentException, MathException; /** @@ -406,12 +406,12 @@ public interface TestStatistic { * at least 5 observations. * * - * @param sampleStats1 DescriptiveStatistics describing data from the first sample - * @param sampleStats2 DescriptiveStatistics describing data from the second sample + * @param sampleStats1 StatisticalSummary describing data from the first sample + * @param sampleStats2 StatisticalSummary describing data from the second sample * @return p-value for t-test * @throws IllegalArgumentException if the precondition is not met */ - double tTest(DescriptiveStatistics sampleStats1, DescriptiveStatistics sampleStats2) + double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) throws IllegalArgumentException, MathException; /** @@ -453,14 +453,14 @@ public interface TestStatistic { *
  • 0 < alpha < 0.5 *
  • * - * @param sampleStats1 DescriptiveStatistics describing sample data values - * @param sampleStats2 DescriptiveStatistics describing sample data values + * @param sampleStats1 StatisticalSummary describing sample data values + * @param sampleStats2 StatisticalSummary describing sample data values * @param alpha significance level of the test * @return true if the null hypothesis can be rejected with * confidence 1 - alpha * @throws IllegalArgumentException if the preconditions are not met */ - boolean tTest(DescriptiveStatistics sampleStats1, DescriptiveStatistics sampleStats2, + boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2, double alpha) throws IllegalArgumentException, MathException; @@ -495,12 +495,12 @@ public interface TestStatistic { * * * @param mu constant value to compare sample mean against - * @param sampleStats DescriptiveStatistics describing sample data values + * @param sampleStats StatisticalSummary describing sample data values * @param alpha significance level of the test * @return p-value * @throws IllegalArgumentException if the precondition is not met */ - boolean tTest(double mu, DescriptiveStatistics sampleStats, double alpha) + boolean tTest(double mu, StatisticalSummary sampleStats, double alpha) throws IllegalArgumentException, MathException; /** @@ -526,11 +526,11 @@ public interface TestStatistic { * * * @param mu constant value to compare sample mean against - * @param sampleStats DescriptiveStatistics describing sample data + * @param sampleStats StatisticalSummary describing sample data * @return p-value * @throws IllegalArgumentException if the precondition is not met */ - double tTest(double mu, DescriptiveStatistics sampleStats) + double tTest(double mu, StatisticalSummary sampleStats) throws IllegalArgumentException, MathException; } diff --git a/src/java/org/apache/commons/math/stat/TestStatisticImpl.java b/src/java/org/apache/commons/math/stat/TestStatisticImpl.java index 2121eb50b..a2eed4422 100644 --- a/src/java/org/apache/commons/math/stat/TestStatisticImpl.java +++ b/src/java/org/apache/commons/math/stat/TestStatisticImpl.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ import org.apache.commons.math.distribution.ChiSquaredDistribution; /** * Implements test statistics defined in the TestStatistic interface. * - * @version $Revision: 1.10 $ $Date: 2003/11/19 03:22:54 $ + * @version $Revision: 1.11 $ $Date: 2004/01/25 21:30:41 $ */ public class TestStatisticImpl implements TestStatistic, Serializable { @@ -255,11 +255,11 @@ public class TestStatisticImpl implements TestStatistic, Serializable { /** * @param mu comparison constant - * @param sampleStats DescriptiveStatistics holding sample summary statitstics + * @param sampleStats StatisticalSummary holding sample summary statitstics * @return t statistic * @throws IllegalArgumentException if the precondition is not met */ - public double t(double mu, DescriptiveStatistics sampleStats) + public double t(double mu, StatisticalSummary sampleStats) throws IllegalArgumentException { if ((sampleStats == null) || (sampleStats.getN() < 5)) { throw new IllegalArgumentException("insufficient data for t statistic"); @@ -272,14 +272,14 @@ public class TestStatisticImpl implements TestStatistic, Serializable { } /** - * @param sampleStats1 DescriptiveStatistics describing data from the first sample - * @param sampleStats2 DescriptiveStatistics describing data from the second sample + * @param sampleStats1 StatisticalSummary describing data from the first sample + * @param sampleStats2 StatisticalSummary describing data from the second sample * @return t statistic * @throws IllegalArgumentException if the precondition is not met */ public double t( - DescriptiveStatistics sampleStats1, - DescriptiveStatistics sampleStats2) + StatisticalSummary sampleStats1, + StatisticalSummary sampleStats2) throws IllegalArgumentException { if ((sampleStats1 == null) || (sampleStats2 == null @@ -296,14 +296,14 @@ public class TestStatisticImpl implements TestStatistic, Serializable { } /** - * @param sampleStats1 DescriptiveStatistics describing data from the first sample - * @param sampleStats2 DescriptiveStatistics describing data from the second sample + * @param sampleStats1 StatisticalSummary describing data from the first sample + * @param sampleStats2 StatisticalSummary describing data from the second sample * @return p-value for t-test * @throws IllegalArgumentException if the precondition is not met */ public double tTest( - DescriptiveStatistics sampleStats1, - DescriptiveStatistics sampleStats2) + StatisticalSummary sampleStats1, + StatisticalSummary sampleStats2) throws IllegalArgumentException, MathException { if ((sampleStats1 == null) || (sampleStats2 == null @@ -320,16 +320,16 @@ public class TestStatisticImpl implements TestStatistic, Serializable { } /** - * @param sampleStats1 DescriptiveStatistics describing sample data values - * @param sampleStats2 DescriptiveStatistics describing sample data values + * @param sampleStats1 StatisticalSummary describing sample data values + * @param sampleStats2 StatisticalSummary describing sample data values * @param alpha significance level of the test * @return true if the null hypothesis can be rejected with * confidence 1 - alpha * @throws IllegalArgumentException if the preconditions are not met */ public boolean tTest( - DescriptiveStatistics sampleStats1, - DescriptiveStatistics sampleStats2, + StatisticalSummary sampleStats1, + StatisticalSummary sampleStats2, double alpha) throws IllegalArgumentException, MathException { if ((alpha <= 0) || (alpha > 0.5)) { @@ -341,14 +341,14 @@ public class TestStatisticImpl implements TestStatistic, Serializable { /** * @param mu constant value to compare sample mean against - * @param sampleStats DescriptiveStatistics describing sample data values + * @param sampleStats StatisticalSummary describing sample data values * @param alpha significance level of the test * @return p-value * @throws IllegalArgumentException if the precondition is not met */ public boolean tTest( double mu, - DescriptiveStatistics sampleStats, + StatisticalSummary sampleStats, double alpha) throws IllegalArgumentException, MathException { if ((alpha <= 0) || (alpha > 0.5)) { @@ -360,11 +360,11 @@ public class TestStatisticImpl implements TestStatistic, Serializable { /** * @param mu constant value to compare sample mean against - * @param sampleStats DescriptiveStatistics describing sample data + * @param sampleStats StatisticalSummary describing sample data * @return p-value * @throws IllegalArgumentException if the precondition is not met */ - public double tTest(double mu, DescriptiveStatistics sampleStats) + public double tTest(double mu, StatisticalSummary sampleStats) throws IllegalArgumentException, MathException { if ((sampleStats == null) || (sampleStats.getN() < 5)) { throw new IllegalArgumentException("insufficient data for t statistic"); diff --git a/src/java/org/apache/commons/math/stat/univariate/moment/FirstMoment.java b/src/java/org/apache/commons/math/stat/univariate/moment/FirstMoment.java index 22b857ad7..5928a4276 100644 --- a/src/java/org/apache/commons/math/stat/univariate/moment/FirstMoment.java +++ b/src/java/org/apache/commons/math/stat/univariate/moment/FirstMoment.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,14 +64,14 @@ import org.apache.commons.math.stat.univariate.AbstractStorelessUnivariateStatis * * recursive strategy * . Both incremental and evaluation strategies currently use this approach. - * @version $Revision: 1.11 $ $Date: 2003/11/19 03:28:24 $ + * @version $Revision: 1.12 $ $Date: 2004/01/25 21:30:41 $ */ public class FirstMoment extends AbstractStorelessUnivariateStatistic implements Serializable{ - static final long serialVersionUID = -803343206421984070L; + static final long serialVersionUID = -803343206421984070L; /** count of values that have been added */ - protected int n = 0; + protected long n = 0; /** first moment of values that have been added */ protected double m1 = Double.NaN; diff --git a/src/java/org/apache/commons/math/stat/univariate/moment/GeometricMean.java b/src/java/org/apache/commons/math/stat/univariate/moment/GeometricMean.java index f9412554d..1154b34ed 100644 --- a/src/java/org/apache/commons/math/stat/univariate/moment/GeometricMean.java +++ b/src/java/org/apache/commons/math/stat/univariate/moment/GeometricMean.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,14 +60,14 @@ import org.apache.commons.math.stat.univariate.summary.SumOfLogs; /** * Returns the * geometric mean of the available values - * @version $Revision: 1.14 $ $Date: 2003/11/19 03:28:24 $ + * @version $Revision: 1.15 $ $Date: 2004/01/25 21:30:41 $ */ public class GeometricMean extends SumOfLogs implements Serializable{ static final long serialVersionUID = -8178734905303459453L; /** */ - protected int n = 0; + protected long n = 0; /** */ private double geoMean = Double.NaN; diff --git a/src/java/org/apache/commons/math/stat/univariate/moment/Kurtosis.java b/src/java/org/apache/commons/math/stat/univariate/moment/Kurtosis.java index e7283c9cf..bb0cdbb5c 100644 --- a/src/java/org/apache/commons/math/stat/univariate/moment/Kurtosis.java +++ b/src/java/org/apache/commons/math/stat/univariate/moment/Kurtosis.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ import org .AbstractStorelessUnivariateStatistic; /** - * @version $Revision: 1.14 $ $Date: 2003/11/19 03:28:24 $ + * @version $Revision: 1.15 $ $Date: 2004/01/25 21:30:41 $ */ public class Kurtosis extends AbstractStorelessUnivariateStatistic implements Serializable { @@ -80,7 +80,7 @@ public class Kurtosis extends AbstractStorelessUnivariateStatistic implements Se private double kurtosis = Double.NaN; /** */ - private int n = 0; + private long n = 0; /** * Construct a Kurtosis diff --git a/src/java/org/apache/commons/math/stat/univariate/moment/Skewness.java b/src/java/org/apache/commons/math/stat/univariate/moment/Skewness.java index 51ec3bd19..ba57df5fe 100644 --- a/src/java/org/apache/commons/math/stat/univariate/moment/Skewness.java +++ b/src/java/org/apache/commons/math/stat/univariate/moment/Skewness.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,7 +58,7 @@ import java.io.Serializable; import org.apache.commons.math.stat.univariate.AbstractStorelessUnivariateStatistic; /** - * @version $Revision: 1.14 $ $Date: 2003/11/19 03:28:24 $ + * @version $Revision: 1.15 $ $Date: 2004/01/25 21:30:41 $ */ public class Skewness extends AbstractStorelessUnivariateStatistic implements Serializable { @@ -74,7 +74,7 @@ public class Skewness extends AbstractStorelessUnivariateStatistic implements Se protected double skewness = Double.NaN; /** */ - private int n = 0; + private long n = 0; /** * Constructs a Skewness diff --git a/src/java/org/apache/commons/math/stat/univariate/moment/Variance.java b/src/java/org/apache/commons/math/stat/univariate/moment/Variance.java index ed02484f5..2fb77a16c 100644 --- a/src/java/org/apache/commons/math/stat/univariate/moment/Variance.java +++ b/src/java/org/apache/commons/math/stat/univariate/moment/Variance.java @@ -58,8 +58,12 @@ import java.io.Serializable; import org.apache.commons.math.stat.univariate.AbstractStorelessUnivariateStatistic; /** - * - * @version $Revision: 1.14 $ $Date: 2003/11/19 03:28:24 $ + * Updating forumulas use West's algorithm as described in + * Chan, T. F. and + * J. G. Lewis 1979, Communications of the ACM, + * vol. 22 no. 9, pp. 526-531.. + * + * @version $Revision: 1.15 $ $Date: 2004/01/25 21:30:41 $ */ public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable { @@ -87,7 +91,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * If the external SecondMoment is used, the this is updated from * that moments counter */ - protected int n = 0; + protected long n = 0; /** * Constructs a Variance. diff --git a/src/java/org/apache/commons/math/stat/univariate/rank/Max.java b/src/java/org/apache/commons/math/stat/univariate/rank/Max.java index e1426dbb1..ecbf4bea5 100644 --- a/src/java/org/apache/commons/math/stat/univariate/rank/Max.java +++ b/src/java/org/apache/commons/math/stat/univariate/rank/Max.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,14 +64,14 @@ import org .AbstractStorelessUnivariateStatistic; /** - * @version $Revision: 1.12 $ $Date: 2003/11/19 03:28:24 $ + * @version $Revision: 1.13 $ $Date: 2004/01/25 21:30:41 $ */ public class Max extends AbstractStorelessUnivariateStatistic implements Serializable { static final long serialVersionUID = -5593383832225844641L; /** */ - private int n = 0; + private long n = 0; /** */ private double value = Double.NaN; diff --git a/src/java/org/apache/commons/math/stat/univariate/rank/Min.java b/src/java/org/apache/commons/math/stat/univariate/rank/Min.java index 5b215a53f..f7a7cc9d2 100644 --- a/src/java/org/apache/commons/math/stat/univariate/rank/Min.java +++ b/src/java/org/apache/commons/math/stat/univariate/rank/Min.java @@ -1,7 +1,7 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2003 The Apache Software Foundation. All rights + * Copyright (c) 2003-2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,14 +64,14 @@ import org .AbstractStorelessUnivariateStatistic; /** - * @version $Revision: 1.12 $ $Date: 2003/11/19 03:28:24 $ + * @version $Revision: 1.13 $ $Date: 2004/01/25 21:30:41 $ */ public class Min extends AbstractStorelessUnivariateStatistic implements Serializable { static final long serialVersionUID = -2941995784909003131L; /** */ - private int n = 0; + private long n = 0; /** */ private double value = Double.NaN; diff --git a/src/test/org/apache/commons/math/random/EmpiricalDistributionTest.java b/src/test/org/apache/commons/math/random/EmpiricalDistributionTest.java index 536cb9f10..446b319d9 100644 --- a/src/test/org/apache/commons/math/random/EmpiricalDistributionTest.java +++ b/src/test/org/apache/commons/math/random/EmpiricalDistributionTest.java @@ -60,13 +60,12 @@ import java.io.File; import java.net.URL; import java.net.URLDecoder; -import org.apache.commons.math.stat.DescriptiveStatistics; -import org.apache.commons.math.stat.StorelessDescriptiveStatisticsImpl; +import org.apache.commons.math.stat.SummaryStatistics; /** * Test cases for the EmpiricalDistribution class * - * @version $Revision: 1.10 $ $Date: 2004/01/15 05:22:08 $ + * @version $Revision: 1.11 $ $Date: 2004/01/25 21:30:41 $ */ public final class EmpiricalDistributionTest extends TestCase { @@ -150,7 +149,7 @@ public final class EmpiricalDistributionTest extends TestCase { private void tstGen(double tolerance)throws Exception { empiricalDistribution.load(file); - DescriptiveStatistics stats = new StorelessDescriptiveStatisticsImpl(); + SummaryStatistics stats = SummaryStatistics.newInstance(); for (int i = 1; i < 1000; i++) { stats.addValue(empiricalDistribution.getNextValue()); } diff --git a/src/test/org/apache/commons/math/random/RandomDataTest.java b/src/test/org/apache/commons/math/random/RandomDataTest.java index 0af605430..34fca0b61 100644 --- a/src/test/org/apache/commons/math/random/RandomDataTest.java +++ b/src/test/org/apache/commons/math/random/RandomDataTest.java @@ -61,14 +61,13 @@ import java.security.NoSuchAlgorithmException; import java.util.HashSet; import org.apache.commons.math.stat.Frequency; -import org.apache.commons.math.stat.StorelessDescriptiveStatisticsImpl; +import org.apache.commons.math.stat.SummaryStatistics; import org.apache.commons.math.stat.TestStatisticImpl; -import org.apache.commons.math.stat.DescriptiveStatistics; /** * Test cases for the RandomData class. * - * @version $Revision: 1.8 $ $Date: 2003/11/15 16:01:40 $ + * @version $Revision: 1.9 $ $Date: 2004/01/25 21:30:41 $ */ public final class RandomDataTest extends TestCase { @@ -405,7 +404,7 @@ public final class RandomDataTest extends TestCase { } catch (IllegalArgumentException ex) { ; } - DescriptiveStatistics u = new StorelessDescriptiveStatisticsImpl(); + SummaryStatistics u = SummaryStatistics.newInstance(); for (int i = 0; i