This house-cleaning improves UnivariateImpl, in a number of ways.

1.) insertValue is abolished and its contents are now in addValue

2.) UnivariateImpl now extends AbstractStoredUnivariate to deligate to
 those methods directly for calculating statistics when storage is active, all methods
 deligate to AbstractStoreUniv when the DoubleArray is no longer null. This also means
that a majority of the StoreUnivariate interface is now implemented in Univariate to provide
deligates when storage is active, and to throw runtime exceptions when its not (this at least until 
we establish rolling implementations for those methods). We should consider consolidating the 
StoreUnivariate interface into the Univariate interface

3.) Calculations in addValue have been reorganized, only calculations for
the storageless solution are now present in this class. otherwise the value is 
added/Rolling to the Double array when appropriate.

I'm satisfied that it passes all Unit tests.


git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140910 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark R. Diggory 2003-06-16 20:42:24 +00:00
parent 464aac515f
commit 1be3d32358
1 changed files with 335 additions and 299 deletions

View File

@ -71,352 +71,388 @@ import org.apache.commons.math.FixedDoubleArray;
* @author <a href="mailto:mdiggory@apache.org">Mark Diggory</a>
* @author Brent Worden
* @author <a href="mailto:HotFusionMan@Yahoo.com">Albert Davidson Chou</a>
* @version $Revision: 1.5 $ $Date: 2003/06/16 14:29:30 $
* @version $Revision: 1.6 $ $Date: 2003/06/16 20:42:24 $
*
*/
public class UnivariateImpl implements Univariate, Serializable {
public class UnivariateImpl
extends AbstractStoreUnivariate
implements Univariate, Serializable {
/** hold the window size **/
private int windowSize = Univariate.INFINITE_WINDOW;
/** hold the window size **/
private int windowSize = Univariate.INFINITE_WINDOW;
/** Just in case the windowSize is not infinite, we need to
* keep an array to remember values 0 to N
*/
private DoubleArray doubleArray;
/** Just in case the windowSize is not infinite, we need to
* keep an array to remember values 0 to N
*/
private DoubleArray doubleArray;
/** count of values that have been added */
private int n = 0;
/** count of values that have been added */
private int n = 0;
/** min of values that have been added */
private double min = Double.MAX_VALUE;
/** sum of values that have been added */
private double sum = Double.NaN;
/** max of values that have been added */
private double max = Double.MIN_VALUE;
/** sum of the square of each value that has been added */
private double sumsq = Double.NaN;
/** product of values that have been added */
private double product = Double.NaN;
/** sum of the Cube of each value that has been added */
private double sumCube = Double.NaN;
/** mean of values that have been added */
private double mean = Double.NaN ;
/** sum of the Quadrate of each value that has been added */
private double sumQuad = Double.NaN;
/** running ( variance * (n - 1) ) of values that have been added */
private double pre_variance = Double.NaN ;
/** min of values that have been added */
private double min = Double.NaN;
/** variance of values that have been added */
private double variance = Double.NaN ;
/** max of values that have been added */
private double max = Double.NaN;
/** running sum of values that have been added */
private double sum = 0.0;
/** product of values that have been added */
private double product = Double.NaN;
/** running sum of squares that have been added */
private double sumsq = 0.0;
/** mean of values that have been added */
private double mean = Double.NaN;
/** running sum of 3rd powers that have been added */
private double sumCube = 0.0;
/** running ( variance * (n - 1) ) of values that have been added */
private double pre_variance = Double.NaN;
/** running sum of 4th powers that have been added */
private double sumQuad = 0.0;
/** variance of values that have been added */
private double variance = Double.NaN;
/** Creates new univariate with an infinite window */
public UnivariateImpl() {
clear();
}
/** Creates new univariate with an infinite window */
public UnivariateImpl() {
super();
}
/** Creates a new univariate with a fixed window **/
public UnivariateImpl(int window) {
windowSize = window;
doubleArray = new FixedDoubleArray( window );
}
/** Creates a new univariate with a fixed window **/
public UnivariateImpl(int window) {
super();
setWindowSize(window);
}
/**
* @see org.apache.commons.math.stat.Univariate#addValue(double)
*/
public void addValue(double v) {
insertValue(v);
}
/** Getter for property n.
* @return Value of property n.
*/
public int getN() {
return n;
}
/**
* @see org.apache.commons.math.stat.Univariate#getMean()
*/
public double getMean() {
return mean ;
}
/**
* Returns the sum of all values contained herein
* @see org.apache.commons.math.stat.Univariate#getSum()
*/
public double getSum() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getSum();
}
/**
* @see org.apache.commons.math.stat.Univariate#getGeometricMean()
*/
public double getGeometricMean() {
if ((product <= 0.0) || (n == 0)) {
return Double.NaN;
} else {
return Math.pow(product,( 1.0 / (double) n ) );
}
}
return sum;
}
/**
* @see org.apache.commons.math.stat.Univariate#getProduct()
*/
public double getProduct() {
return product;
}
/**
* Returns the sun of the squares of all values contained herein
* @see org.apache.commons.math.stat.Univariate#getSumsq()
*/
public double getSumsq() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getSumsq();
}
/**
* @see org.apache.commons.math.stat.Univariate#getStandardDeviation()
*/
public double getStandardDeviation() {
double variance = getVariance();
return sumsq;
}
if ((variance == 0.0) || (variance == Double.NaN)) {
return variance;
} else {
return Math.sqrt(variance);
}
}
/**
* @see org.apache.commons.math.stat.Univariate#getMean()
*/
public double getMean() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getMean();
}
/**
* Returns the variance of the values that have been added via West's
* algorithm as described by
* <a href="http://doi.acm.org/10.1145/359146.359152">Chan, T. F. and
* J. G. Lewis 1979, <i>Communications of the ACM</i>,
* vol. 22 no. 9, pp. 526-531.</a>.
*
* @return The variance of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a &lt;= 1 value set.
*/
public double getVariance() {
return variance ;
}
return mean;
}
/**
* Returns the skewness of the values that have been added as described by
* <a href="http://mathworld.wolfram.com/k-Statistic.html">Equation (6) for k-Statistics</a>.
*
* @return The skew of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a &lt;= 2 value set.
*/
public double getSkewness() {
/**
* Returns the variance of the values that have been added via West's
* algorithm as described by
* <a href="http://doi.acm.org/10.1145/359146.359152">Chan, T. F. and
* J. G. Lewis 1979, <i>Communications of the ACM</i>,
* vol. 22 no. 9, pp. 526-531.</a>.
*
* @return The variance of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a &lt;= 1 value set.
*/
public double getVariance() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getVariance();
}
if( n < 1) return Double.NaN;
if( n <= 2 ) return 0.0;
return variance;
}
return ( 2 * Math.pow(sum, 3) - 3 * sum * sumsq + ((double) (n * n)) * sumCube ) /
( (double) (n * (n - 1) * (n - 2)) ) ;
}
/**
* Returns the skewness of the values that have been added as described by
* <a href="http://mathworld.wolfram.com/k-Statistic.html">Equation (6) for k-Statistics</a>.
*
* @return The skew of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a &lt;= 2 value set.
*/
public double getSkewness() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getSkewness();
}
/**
* Returns the kurtosis of the values that have been added as described by
* <a href="http://mathworld.wolfram.com/k-Statistic.html">Equation (7) for k-Statistics</a>.
*
* @return The kurtosis of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a &lt;= 3 value set.
*/
public double getKurtosis() {
if (n == 0) {
return Double.NaN;
}
if( n < 1) return Double.NaN;
if( n <= 3 ) return 0.0;
if (n <= 2) {
/* if n <= 2, skewness to 0.0 */
return 0.0;
} else {
/* else calc the skewness */
return (
2 * Math.pow(sum, 3)
- 3 * sum * sumsq
+ ((double) (n * n)) * sumCube)
/ ((double) (n * (n - 1) * (n - 2)));
}
}
double x1 = -6 * Math.pow(sum, 4);
double x2 = 12 * ((double) n) * Math.pow(sum, 2) * sumsq;
double x3 = -3 * ((double) (n * (n - 1))) * Math.pow(sumsq,2);
double x4 = -4 * ((double) (n * (n + 1))) * sum * sumCube;
double x5 = Math.pow(((double) n),2) * ((double) (n+1)) * sumQuad;
/**
* Returns the kurtosis of the values that have been added as described by
* <a href="http://mathworld.wolfram.com/k-Statistic.html">Equation (7) for k-Statistics</a>.
*
* @return The kurtosis of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a &lt;= 3 value set.
*/
public double getKurtosis() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getKurtosis();
}
return (x1 + x2 + x3 + x4 + x5) /
( (double) (n * (n - 1) * (n - 2) * (n - 3)) );
}
if (n == 0) {
return Double.NaN;
}
/**
* Called in "addValue" to insert a new value into the statistic.
* @param v The value to be added.
*/
private void insertValue(double v) {
// The default value of product is NaN, if you
// try to retrieve the product for a univariate with
// no values, we return NaN.
//
// If this is the first call to insertValue, we want
// to set product to 1.0, so that our first element
// is not "cancelled" out by the NaN.
//
// For the first value added, the mean is that value,
// and the variance is zero.
if( n == 0 ) {
product = 1.0 ;
mean = v ;
pre_variance = 0.0 ;
variance = 0.0 ;
}
if (n <= 3) {
/* if n <= 3, kurtosis to 0.0 */
return 0.0;
} else {
/* calc the kurtosis */
double x1 = -6 * Math.pow(sum, 4);
double x2 = 12 * ((double) n) * Math.pow(sum, 2) * sumsq;
double x3 = -3 * ((double) (n * (n - 1))) * Math.pow(sumsq, 2);
double x4 = -4 * ((double) (n * (n + 1))) * sum * sumCube;
double x5 =
Math.pow(((double) n), 2) * ((double) (n + 1)) * sumQuad;
if( windowSize != Univariate.INFINITE_WINDOW ) {
if( windowSize == n ) {
double discarded = doubleArray.addElementRolling( v );
return (x1 + x2 + x3 + x4 + x5)
/ ((double) (n * (n - 1) * (n - 2) * (n - 3)));
}
}
// Remove the influence of the discarded
sum -= discarded;
sumsq -= discarded * discarded;
sumCube -= Math.pow(discarded, 3);
sumQuad -= Math.pow(discarded, 4);
/** Getter for property max.
* @return Value of property max.
*/
public double getMax() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getMax();
}
if(discarded == min) {
min = doubleArray.getMin();
} else if(discarded == max){
max = doubleArray.getMax();
}
return max;
}
if(product != 0.0){
// can safely remove discarded value
product *= v / discarded;
} else if(discarded == 0.0){
// need to recompute product
product = 1.0;
double[] elements = doubleArray.getElements();
for( int i = 0; i < elements.length; i++ ) {
product *= elements[i];
}
} // else product = 0 and will still be 0 after discard
/** Getter for property min.
* @return Value of property min.
*/
public double getMin() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getMin();
}
} else {
doubleArray.addElement( v );
n += 1 ;
if (v < min) {
min = v;
}
if (v > max) {
max = v;
}
product *= v;
}
} else {
// If the windowSize is infinite please don't take the time to
// worry about storing any values. We don't need to discard the
// influence of any single item.
n += 1 ;
if (v < min) {
min = v;
}
if (v > max) {
max = v;
}
product *= v;
return min;
}
if ( n > 1 )
{
double deviationFromMean = v - mean ;
double deviationFromMean_overN = deviationFromMean / n ;
mean += deviationFromMean_overN ;
pre_variance += (n - 1) * deviationFromMean * deviationFromMean_overN ;
variance = pre_variance / (n - 1) ;
}
}
/**
* @see org.apache.commons.math.stat.Univariate#getProduct()
*/
public double getProduct() {
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getProduct();
}
sum += v;
sumsq += v * v;
sumCube += Math.pow(v,3);
sumQuad += Math.pow(v,4);
}
return product;
}
/** Getter for property max.
* @return Value of property max.
*/
public double getMax() {
if (n == 0) {
return Double.NaN;
} else {
return max;
}
}
/**
* @see org.apache.commons.math.stat.Univariate#getGeometricMean()
*/
public double getGeometricMean() {
/** Getter for property min.
* @return Value of property min.
*/
public double getMin() {
if (n == 0) {
return Double.NaN;
} else {
return min;
}
}
if (windowSize != Univariate.INFINITE_WINDOW) {
return super.getGeometricMean();
}
/** Getter for property n.
* @return Value of property n.
*/
public int getN() {
return n;
}
if ((product <= 0.0) || (n == 0)) {
return Double.NaN;
} else {
return Math.pow(product, (1.0 / (double) n));
}
}
/** Getter for property sum.
* @return Value of property sum.
*/
public double getSum() {
return sum;
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.StoreUnivariate#getMode()
*/
public double getMode() {
if (windowSize == Univariate.INFINITE_WINDOW) {
throw new RuntimeException("Mode is only available if windowSize is fixed");
}
/** Getter for property sumsq.
* @return Value of property sumsq.
*/
public double getSumsq() {
return sumsq;
}
return super.getMode();
}
/** Getter for property sumCube.
* @return Value of property sumCube.
*/
public double getSumCube() {
return sumCube;
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.StoreUnivariate#getPercentile(double)
*/
public double getPercentile(double p) {
if (windowSize == Univariate.INFINITE_WINDOW) {
throw new RuntimeException("Percentiles are only available if windowSize is fixed");
}
/** Getter for property sumQuad.
* @return Value of property sumQuad.
*/
public double getSumQuad() {
return sumQuad;
}
return super.getPercentile(p);
/**
* Generates a text report displaying
* univariate statistics from values that
* have been added.
* @return String with line feeds displaying statistics
*/
public String toString() {
StringBuffer outBuffer = new StringBuffer();
outBuffer.append("UnivariateImpl:\n");
outBuffer.append("n: " + n + "\n");
outBuffer.append("min: " + min + "\n");
outBuffer.append("max: " + max + "\n");
outBuffer.append("mean: " + getMean() + "\n");
outBuffer.append("std dev: " + getStandardDeviation() + "\n");
outBuffer.append("skewness: " + getSkewness() + "\n");
outBuffer.append("kurtosis: " + getKurtosis() + "\n");
return outBuffer.toString();
}
}
/**
* Resets all sums, product, mean, and variance to 0; resets min and max.
*/
public void clear() {
this.sum = this.sumsq = this.sumCube = this.sumQuad = 0.0;
this.n = 0;
this.min = Double.MAX_VALUE;
this.max = Double.MIN_VALUE;
this.product = Double.NaN;
this.mean = Double.NaN ;
this.variance = this.pre_variance = Double.NaN ;
}
/**
* @see org.apache.commons.math.stat.Univariate#addValue(double)
*/
public void addValue(double v) {
/* (non-Javadoc)
* @see org.apache.commons.math.Univariate#getWindowSize()
*/
public int getWindowSize() {
return windowSize;
}
if (windowSize != Univariate.INFINITE_WINDOW) {
/* then all getters deligate to AbstractStoreUnivariate
* and this clause simply adds/rolls a value in the storage array
*/
if (windowSize == n) {
doubleArray.addElementRolling(v);
} else {
n++;
doubleArray.addElement(v);
}
/* (non-Javadoc)
* @see org.apache.commons.math.Univariate#setWindowSize(int)
*/
public void setWindowSize(int windowSize) {
String msg = "A fixed window size must be set via the " +
"UnivariateImpl constructor";
throw new RuntimeException( msg );
}
}
} else {
/* If the windowSize is infinite don't store any values and there
* is no need to discard the influence of any single item.
*/
n++;
if (n <= 1) {
/* if n <= 1, initialize the product, min, max, mean, variance and pre-variance */
product = 1.0;
sum = min = max = mean = v;
sumsq = Math.pow(v, 2);
sumCube = Math.pow(v, 3);
sumQuad = Math.pow(v, 4);
variance = pre_variance = 0.0;
} else {
/* otherwise calc these values */
product *= v;
sum += v;
sumsq += Math.pow(v, 2);
sumCube += Math.pow(v, 3);
sumQuad += Math.pow(v, 4);
min = Math.min(min, v);
max = Math.max(max, v);
double deviationFromMean = v - mean;
double deviationFromMean_overN = deviationFromMean / n;
mean += deviationFromMean_overN;
pre_variance += (n - 1)
* deviationFromMean
* deviationFromMean_overN;
variance = pre_variance / (n - 1);
}
}
}
/**
* Generates a text report displaying
* univariate statistics from values that
* have been added.
* @return String with line feeds displaying statistics
*/
public String toString() {
StringBuffer outBuffer = new StringBuffer();
outBuffer.append("UnivariateImpl:\n");
outBuffer.append("n: " + n + "\n");
outBuffer.append("min: " + min + "\n");
outBuffer.append("max: " + max + "\n");
outBuffer.append("mean: " + getMean() + "\n");
outBuffer.append("std dev: " + getStandardDeviation() + "\n");
outBuffer.append("skewness: " + getSkewness() + "\n");
outBuffer.append("kurtosis: " + getKurtosis() + "\n");
return outBuffer.toString();
}
/**
* Resets all stats to NaN. Reinitializes the Double Array
*/
public void clear() {
this.n = 0;
this.min = this.max = Double.NaN;
this.product = this.mean = Double.NaN;
this.variance = this.pre_variance = Double.NaN;
if (doubleArray != null)
doubleArray = new FixedDoubleArray(windowSize);
}
/* (non-Javadoc)
* @see org.apache.commons.math.Univariate#getWindowSize()
*/
public int getWindowSize() {
return windowSize;
}
/* (non-Javadoc)
* @see org.apache.commons.math.Univariate#setWindowSize(int)
*/
public void setWindowSize(int windowSize) {
clear();
this.windowSize = windowSize;
doubleArray = new FixedDoubleArray(windowSize);
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.StoreUnivariate#getValues()
*/
public double[] getValues() {
if (windowSize == Univariate.INFINITE_WINDOW) {
throw new RuntimeException("Values are only available if windowSize is fixed");
}
return this.doubleArray.getElements();
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.StoreUnivariate#getElement(int)
*/
public double getElement(int index) {
if (windowSize == Univariate.INFINITE_WINDOW) {
throw new RuntimeException("Elements are only available if windowSize is fixed");
}
return this.doubleArray.getElement(index);
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.StoreUnivariate#getSortedValues()
*/
public double[] getSortedValues() {
if (windowSize == Univariate.INFINITE_WINDOW) {
throw new RuntimeException("SortedValues are only available if windowSize is fixed");
}
return super.getSortedValues();
}
}