From Phil Steitz patch submission for Issue #20175
The attached patch includes the following improvements to Univariate and UnivariateImpl: * Improved efficiency of min, max and product maintenance when windowSize is limited by incorporating suggestion posted to commons-dev by Brend Worden (added author credit). Thanks, Brent! * Added javadoc specifying NaN contracts for all statistics, definitions for geometric and arithmetic means. * Made some slight modifications to UnivariateImpl to make it consistent with NaN contracts * All interface documentation moved to Univariate. The interface specification includes the NaN semantics and a first attempt at clealy defining exactly what "rolling" means and how this effects what statistics are defined when. * Added test cases to verify that min, max, product are correctly maintained when "rolling" and to verify that NaN contracts are satisfied. git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140857 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5918a1fe1e
commit
b84e61ffcf
|
@ -54,8 +54,8 @@ The Math project is a library of lightweight, self-contained mathematics and sta
|
|||
</dependency> -->
|
||||
</dependencies>
|
||||
|
||||
<!-- <issueTrackingUrl>http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&email1=&emailtype1=substring&emailassigned_to1=1&email2=&emailtype2=substring&emailreporter2=1&bugidtype=include&bug_id=&changedin=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&product=Commons&component=Sandbox&short_desc=&short_desc_type=allwordssubstr&long_desc=&long_desc_type=allwordssubstr&bug_file_loc=&bug_file_loc_type=allwordssubstr&keywords=&keywords_type=anywords&field0-0-0=noop&type0-0-0=noop&value0-0-0=&cmdtype=doit&order=%27Importance%27</issueTrackingUrl>
|
||||
-->
|
||||
<issueTrackingUrl>http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&email1=&emailtype1=substring&emailassigned_to1=1&email2=&emailtype2=substring&emailreporter2=1&bugidtype=include&bug_id=&changedin=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&product=Commons&component=Sandbox&short_desc=&short_desc_type=allwordssubstr&long_desc=&long_desc_type=allwordssubstr&bug_file_loc=&bug_file_loc_type=allwordssubstr&keywords=&keywords_type=anywords&field0-0-0=noop&type0-0-0=noop&value0-0-0=&cmdtype=doit&order=%27Importance%27</issueTrackingUrl>
|
||||
|
||||
<repository>
|
||||
<connection>scm:cvs:pserver:anoncvs@cvs.apache.org:/home/cvspublic:jakarta-commons-sandbox/${pom.artifactId.substring(8)}</connection>
|
||||
<url>http://cvs.apache.org/viewcvs/jakarta-commons-sandbox/${pom.artifactId.substring(8)}/</url>
|
||||
|
|
|
@ -56,13 +56,24 @@
|
|||
/**
|
||||
*
|
||||
* Accumulates univariate statistics for values fed in
|
||||
* through the addValue() method. This interface defines the LCD interface
|
||||
* which all Univariate implementations must implement.
|
||||
* through the addValue() method. This interface defines the LCD interface
|
||||
* which all Univariate implementations must implement. <p>
|
||||
* A "rolling" capability is supported by all implementations with the following
|
||||
* contract: <p>
|
||||
* <i> Setting the windowSize property limits the domain of all statistics to
|
||||
* the last <code>windowSize</code> values added.</i><p>
|
||||
* We use the term <i>available values</i> throughout the API documentation
|
||||
* to refer to these values when the windowSize is set. For example, if the
|
||||
* windowSize is set to 3 and the values {1,2,3,4,5} have been added <strong>
|
||||
* in that order</strong> then the <i>available values</i> are {3,4,5} and all
|
||||
* reported statistics will be based on these values<p>
|
||||
* The default windowSize is "infinite" -- i.e., all values added are included
|
||||
* in all computations.
|
||||
*
|
||||
* @author Phil Steitz
|
||||
* @author <a href="mailto:tobrien@apache.org">Tim O'Brien</a>
|
||||
* @author Mark Diggory
|
||||
* @version $Revision: 1.5 $ $Date: 2003/05/21 17:59:19 $
|
||||
* @version $Revision: 1.6 $ $Date: 2003/05/23 17:33:18 $
|
||||
*
|
||||
*/
|
||||
public interface Univariate {
|
||||
|
@ -71,65 +82,96 @@ public interface Univariate {
|
|||
* Adds the value to the set of numbers
|
||||
* @param v the value to be added
|
||||
*/
|
||||
public abstract void addValue(double v);
|
||||
abstract void addValue(double v);
|
||||
|
||||
/**
|
||||
* Returns the mean of the values that have been added
|
||||
* Returns the <a href=http://www.xycoon.com/arithmetic_mean.htm>
|
||||
* arithmetic mean </a> of the available values <p>
|
||||
*
|
||||
* Will return Double.NaN if no values have been added when
|
||||
* this method is invoked.
|
||||
*
|
||||
* @return mean value
|
||||
*/
|
||||
public abstract double getMean();
|
||||
abstract double getMean();
|
||||
|
||||
/**
|
||||
* Returns the geometric mean of the values that have been added
|
||||
* Returns the <a href=http://www.xycoon.com/geometric_mean.htm>
|
||||
* geometric mean </a> of the available values <p>
|
||||
*
|
||||
* Will return Double.NaN if no values have been added or the product
|
||||
* of the available values is less than or equal to 0.
|
||||
*
|
||||
* @return mean value
|
||||
*/
|
||||
public abstract double getGeometricMean();
|
||||
abstract double getGeometricMean();
|
||||
|
||||
/**
|
||||
* Returns the product of all values that have been added
|
||||
* Returns the product of the available values <p>
|
||||
* Will return Double.NaN if no values have been added.
|
||||
*
|
||||
* @return product of all values
|
||||
*/
|
||||
public abstract double getProduct();
|
||||
abstract double getProduct();
|
||||
|
||||
/**
|
||||
* Returns the variance of the values that have been added
|
||||
* @return variance value
|
||||
* Returns the variance of the available values. <p>
|
||||
* Double.NaN is returned for an empty set of values and 0.0 is
|
||||
* returned for a single value set.
|
||||
*
|
||||
* @return The variance of a set of values.
|
||||
*/
|
||||
public abstract double getVariance();
|
||||
abstract double getVariance();
|
||||
|
||||
/**
|
||||
* Returns the standard deviation of the values that have been added
|
||||
* Returns the variance of the available values. <p>
|
||||
* Double.NaN is returned for an empty set of values and 0.0 is
|
||||
* returned for a single value set.
|
||||
*
|
||||
* @return standard deviation value
|
||||
*/
|
||||
public abstract double getStandardDeviation();
|
||||
abstract double getStandardDeviation();
|
||||
|
||||
/** Getter for property max.
|
||||
/**
|
||||
* Returns the maximum of the available values <p>
|
||||
* Double.NaN is returned in no values have been added
|
||||
*
|
||||
* @return Value of property max.
|
||||
*/
|
||||
public abstract double getMax();
|
||||
abstract double getMax();
|
||||
|
||||
/** Getter for property min.
|
||||
/**
|
||||
* Returns the minimum of the available values <p>
|
||||
* Double.NaN is returned in no values have been added
|
||||
*
|
||||
* @return Value of property min.
|
||||
*/
|
||||
public abstract double getMin();
|
||||
abstract double getMin();
|
||||
|
||||
/** Getter for property n.
|
||||
* @return Value of property n.
|
||||
/**
|
||||
* Returns the number of available values
|
||||
* @return the number of available values
|
||||
*/
|
||||
public abstract int getN();
|
||||
abstract int getN();
|
||||
|
||||
/** Getter for property sum.
|
||||
* @return Value of property sum.
|
||||
/**
|
||||
* Returns the sum of the available values <p>
|
||||
* Returns 0 if no values have been added.
|
||||
*
|
||||
* @return the sum of the available values
|
||||
*/
|
||||
public abstract double getSum();
|
||||
abstract double getSum();
|
||||
|
||||
/** Getter for property sumsq.
|
||||
* @return Value of property sumsq.
|
||||
/**
|
||||
* Returns the sum of the squares of the available values.
|
||||
* Returns 0 if no values have been added.
|
||||
*
|
||||
* @return the sum of the squares of the available values.
|
||||
*/
|
||||
public abstract double getSumsq();
|
||||
abstract double getSumsq();
|
||||
|
||||
/** Resets all sums to 0, resets min and max */
|
||||
public abstract void clear();
|
||||
/** Resets all statistics */
|
||||
abstract void clear();
|
||||
|
||||
/**
|
||||
* This constant signals that a Univariate implementation
|
||||
|
@ -137,13 +179,13 @@ public interface Univariate {
|
|||
* elements. In other words, if getWindow returns this
|
||||
* constant, there is, in effect, no "window".
|
||||
*/
|
||||
public static final int INIFINTE_WINDOW = -1;
|
||||
static final int INIFINTE_WINDOW = -1;
|
||||
|
||||
/**
|
||||
* Univariate has the ability to return only measures for the
|
||||
* last N elements added to the set of values. This function returns
|
||||
*/
|
||||
public abstract int getWindowSize();
|
||||
abstract int getWindowSize();
|
||||
|
||||
/**
|
||||
* Sets the window. windowSize controls the number of value
|
||||
|
@ -151,5 +193,5 @@ public interface Univariate {
|
|||
* For example, a window value of 10 means that getMean()
|
||||
* will return the mean of the last 10 values added.
|
||||
*/
|
||||
public abstract void setWindowSize(int windowSize);
|
||||
abstract void setWindowSize(int windowSize);
|
||||
}
|
||||
|
|
|
@ -59,14 +59,15 @@ import java.io.Serializable;
|
|||
*
|
||||
* Accumulates univariate statistics for values fed in
|
||||
* through the addValue() method. Does not store raw data values.
|
||||
* All data (including n) are represented internally as doubles.
|
||||
* All data are represented internally as doubles.
|
||||
* Integers, floats and longs can be added, but will be converted
|
||||
* to doubles by addValue().
|
||||
*
|
||||
* @author Phil Steitz
|
||||
* @author Mark Diggory
|
||||
* @author <a href="mailto:tobrien@apache.org">Tim O'Brien</a>
|
||||
* @version $Revision: 1.6 $ $Date: 2003/05/21 17:59:19 $
|
||||
* @author Mark Diggory
|
||||
* @author Brent Worden
|
||||
* @version $Revision: 1.7 $ $Date: 2003/05/23 17:33:18 $
|
||||
*
|
||||
*/
|
||||
public class UnivariateImpl implements Univariate, Serializable {
|
||||
|
@ -94,7 +95,7 @@ public class UnivariateImpl implements Univariate, Serializable {
|
|||
/** max of values that have been added */
|
||||
private double max = Double.MIN_VALUE;
|
||||
|
||||
/** produce of values that have been added */
|
||||
/** product of values that have been added */
|
||||
private double product = Double.NaN;
|
||||
|
||||
/** Creates new univariate */
|
||||
|
@ -108,45 +109,36 @@ public class UnivariateImpl implements Univariate, Serializable {
|
|||
doubleArray = new FixedDoubleArray( window );
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the value, updating running sums.
|
||||
* @param v the value to be added
|
||||
*/
|
||||
|
||||
public void addValue(double v) {
|
||||
|
||||
insertValue(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the mean of the values that have been added
|
||||
* @return mean value
|
||||
*/
|
||||
|
||||
public double getMean() {
|
||||
// FIXME: throw something meaningful if n = 0
|
||||
return (sum / (double) n );
|
||||
if (n == 0) {
|
||||
return Double.NaN;
|
||||
} else {
|
||||
return (sum / (double) n );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the geometric mean of the values that have been added
|
||||
* @return geometric mean value
|
||||
*/
|
||||
|
||||
public double getGeometricMean() {
|
||||
return Math.pow(product,( 1.0/n ) );
|
||||
if ((product <= 0.0) || (n == 0)) {
|
||||
return Double.NaN;
|
||||
} else {
|
||||
return Math.pow(product,( 1.0/(double)n ) );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the product of all values add to this Univariate
|
||||
* @return product value
|
||||
*/
|
||||
|
||||
public double getProduct() {
|
||||
return product;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the variance of the values that have been added.
|
||||
* @return The variance of a set of values. Double.NaN is returned for
|
||||
* an empty set of values and 0.0 is returned for a single value set.
|
||||
*/
|
||||
|
||||
public double getVariance() {
|
||||
double variance = Double.NaN;
|
||||
|
||||
|
@ -160,21 +152,16 @@ public class UnivariateImpl implements Univariate, Serializable {
|
|||
return variance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard deviation of the values that have been added
|
||||
* @return The standard deviation of a set of values. Double.NaN is
|
||||
* returned for an empty set of values and 0.0 is returned for
|
||||
* a single value set.
|
||||
*/
|
||||
|
||||
public double getStandardDeviation() {
|
||||
return (new Double(Math.sqrt
|
||||
((new Double(getVariance())).doubleValue()))).doubleValue();
|
||||
double variance = getVariance();
|
||||
if ((variance == 0.0) || (variance == Double.NaN)) {
|
||||
return variance;
|
||||
} else {
|
||||
return Math.sqrt(variance);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the value, updating running sums.
|
||||
* @param v the value to be added
|
||||
*/
|
||||
|
||||
private void insertValue(double v) {
|
||||
|
||||
// The default value of product is NaN, if you
|
||||
|
@ -197,25 +184,28 @@ public class UnivariateImpl implements Univariate, Serializable {
|
|||
sum -= discarded;
|
||||
sumsq -= discarded * discarded;
|
||||
|
||||
// Include the influence of the new
|
||||
// TODO: The next two lines seems rather expensive, but
|
||||
// I don't see many alternatives.
|
||||
min = doubleArray.getMin();
|
||||
max = doubleArray.getMax();
|
||||
if(discarded == min) {
|
||||
min = doubleArray.getMin();
|
||||
} else {
|
||||
if(discarded == max){
|
||||
max = doubleArray.getMax();
|
||||
}
|
||||
}
|
||||
|
||||
sum += v;
|
||||
sumsq += v*v;
|
||||
|
||||
// Note that the product CANNOT be discarded
|
||||
// properly because one cannot discount the effect
|
||||
// of a zero value. For this reason, the product
|
||||
// of the altered array must be calculated from the
|
||||
// current array elements. Product must be recalculated
|
||||
// everytime the array is "rolled"
|
||||
product = 1.0;
|
||||
double[] elements = doubleArray.getElements();
|
||||
for( int i = 0; i < elements.length; i++ ) {
|
||||
if(product != 0.0){
|
||||
// can safely remove discarded value
|
||||
product *= v/discarded;
|
||||
} else if(discarded == 0.0){
|
||||
// need to recompute product
|
||||
product = 1.0;
|
||||
double[] elements = doubleArray.getElements();
|
||||
for( int i = 0; i < elements.length; i++ ) {
|
||||
product *= elements[i];
|
||||
}
|
||||
}
|
||||
} // else product = 0 and will still be 0 after discard
|
||||
|
||||
} else {
|
||||
doubleArray.addElement( v );
|
||||
|
@ -243,21 +233,22 @@ public class UnivariateImpl implements Univariate, Serializable {
|
|||
* @return Value of property max.
|
||||
*/
|
||||
public double getMax() {
|
||||
return max;
|
||||
}
|
||||
|
||||
/** Setter for property max.
|
||||
* @param max New value of property max.
|
||||
*/
|
||||
public void setMax(double max) {
|
||||
this.max = max;
|
||||
if (n == 0) {
|
||||
return Double.NaN;
|
||||
} else {
|
||||
return max;
|
||||
}
|
||||
}
|
||||
|
||||
/** Getter for property min.
|
||||
* @return Value of property min.
|
||||
*/
|
||||
public double getMin() {
|
||||
return min;
|
||||
if (n == 0) {
|
||||
return Double.NaN;
|
||||
} else {
|
||||
return min;
|
||||
}
|
||||
}
|
||||
|
||||
/** Getter for property n.
|
||||
|
@ -305,6 +296,7 @@ public class UnivariateImpl implements Univariate, Serializable {
|
|||
this.n = 0;
|
||||
this.min = Double.MAX_VALUE;
|
||||
this.max = Double.MIN_VALUE;
|
||||
this.product = Double.NaN;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
|
|
@ -60,8 +60,9 @@ import junit.framework.TestSuite;
|
|||
/**
|
||||
* Test cases for the {@link Univariate} class.
|
||||
*
|
||||
* @author <a href="mailto:phil@steitz.com">Phil Steitz</a>
|
||||
* @version $Revision: 1.2 $ $Date: 2003/05/21 17:59:20 $
|
||||
* @author Phil Steitz
|
||||
* @author Tim Obrien
|
||||
* @version $Revision: 1.3 $ $Date: 2003/05/23 17:33:18 $
|
||||
*/
|
||||
|
||||
public final class UnivariateImplTest extends TestCase {
|
||||
|
@ -114,16 +115,21 @@ public final class UnivariateImplTest extends TestCase {
|
|||
|
||||
public void testN0andN1Conditions() throws Exception {
|
||||
UnivariateImpl u = new UnivariateImpl();
|
||||
|
||||
assertTrue("Mean of n = 0 set should be NaN", Double.isNaN( u.getMean() ) );
|
||||
assertTrue("Standard Deviation of n = 0 set should be NaN", Double.isNaN( u.getStandardDeviation() ) );
|
||||
assertTrue("Variance of n = 0 set should be NaN", Double.isNaN(u.getVariance() ) );
|
||||
assertTrue("Mean of n = 0 set should be NaN",
|
||||
Double.isNaN( u.getMean() ) );
|
||||
assertTrue("Standard Deviation of n = 0 set should be NaN",
|
||||
Double.isNaN( u.getStandardDeviation() ) );
|
||||
assertTrue("Variance of n = 0 set should be NaN",
|
||||
Double.isNaN(u.getVariance() ) );
|
||||
|
||||
u.addValue(one);
|
||||
|
||||
u.addValue(one);
|
||||
|
||||
assertTrue( "Mean of n = 1 set should be value of single item n1", u.getMean() == one);
|
||||
assertTrue( "Mean of n = 1 set should be zero", u.getStandardDeviation() == 0);
|
||||
assertTrue( "Variance of n = 1 set should be zero", u.getVariance() == 0);
|
||||
assertTrue( "Mean of n = 1 set should be value of single item n1",
|
||||
u.getMean() == one);
|
||||
assertTrue( "Mean of n = 1 set should be zero",
|
||||
u.getStandardDeviation() == 0);
|
||||
assertTrue( "Variance of n = 1 set should be zero",
|
||||
u.getVariance() == 0);
|
||||
}
|
||||
|
||||
public void testProductAndGeometricMean() throws Exception {
|
||||
|
@ -134,8 +140,10 @@ public final class UnivariateImplTest extends TestCase {
|
|||
u.addValue( 3.0 );
|
||||
u.addValue( 4.0 );
|
||||
|
||||
assertEquals( "Product not expected", 24.0, u.getProduct(), Double.MIN_VALUE );
|
||||
assertEquals( "Geometric mean not expected", 2.213364, u.getGeometricMean(), 0.00001 );
|
||||
assertEquals( "Product not expected", 24.0, u.getProduct(),
|
||||
Double.MIN_VALUE );
|
||||
assertEquals( "Geometric mean not expected", 2.213364,
|
||||
u.getGeometricMean(), 0.00001 );
|
||||
|
||||
// Now test rolling - UnivariateImpl should discount the contribution
|
||||
// of a discarded element
|
||||
|
@ -144,11 +152,56 @@ public final class UnivariateImplTest extends TestCase {
|
|||
}
|
||||
// Values should be (2,3,4,5,6,7,8,9,10,11)
|
||||
|
||||
assertEquals( "Product not expected", 39916800.0, u.getProduct(), 0.00001 );
|
||||
assertEquals( "Geometric mean not expected", 5.755931, u.getGeometricMean(), 0.00001 );
|
||||
|
||||
|
||||
assertEquals( "Product not expected", 39916800.0,
|
||||
u.getProduct(), 0.00001 );
|
||||
assertEquals( "Geometric mean not expected", 5.755931,
|
||||
u.getGeometricMean(), 0.00001 );
|
||||
}
|
||||
|
||||
public void testRollingMinMax() {
|
||||
UnivariateImpl u = new UnivariateImpl(3);
|
||||
u.addValue( 1.0 );
|
||||
u.addValue( 5.0 );
|
||||
u.addValue( 3.0 );
|
||||
u.addValue( 4.0 ); // discarding min
|
||||
assertEquals( "min not expected", 3.0,
|
||||
u.getMin(), Double.MIN_VALUE);
|
||||
u.addValue(1.0); // discarding max
|
||||
assertEquals( "max not expected", 4.0,
|
||||
u.getMax(), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
public void testNaNContracts() {
|
||||
UnivariateImpl u = new UnivariateImpl();
|
||||
double nan = Double.NaN;
|
||||
assertTrue("mean not NaN",Double.isNaN(u.getMean()));
|
||||
assertTrue("min not NaN",Double.isNaN(u.getMin()));
|
||||
assertTrue("std dev not NaN",Double.isNaN(u.getStandardDeviation()));
|
||||
assertTrue("var not NaN",Double.isNaN(u.getVariance()));
|
||||
assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
|
||||
|
||||
u.addValue(1.0);
|
||||
|
||||
assertEquals( "mean not expected", 1.0,
|
||||
u.getMean(), Double.MIN_VALUE);
|
||||
assertEquals( "variance not expected", 0.0,
|
||||
u.getVariance(), Double.MIN_VALUE);
|
||||
assertEquals( "geometric mean not expected", 1.0,
|
||||
u.getGeometricMean(), Double.MIN_VALUE);
|
||||
|
||||
u.addValue(-1.0);
|
||||
|
||||
assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
|
||||
|
||||
u.addValue(0.0);
|
||||
|
||||
assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
|
||||
|
||||
//FiXME: test all other NaN contract specs
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue