From Phil Steitz patch submission for Issue #20175

The attached patch includes the following improvements to Univariate and
UnivariateImpl:

* Improved efficiency of min, max and product maintenance when windowSize is
  limited by incorporating suggestion posted to commons-dev by Brend Worden
  (added author credit).  Thanks, Brent!

* Added javadoc specifying NaN contracts for all statistics, definitions for
  geometric and arithmetic means.

* Made some slight modifications to UnivariateImpl to make it consistent with
  NaN contracts

* All interface documentation moved to Univariate. The interface specification
  includes the NaN semantics and a first attempt at clealy defining exactly
  what "rolling" means and how this effects what statistics are defined when.

* Added test cases to verify that min, max, product are correctly maintained
  when "rolling" and to verify that NaN contracts are satisfied.


git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140857 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim O'Brien 2003-05-23 17:33:18 +00:00
parent 5918a1fe1e
commit b84e61ffcf
4 changed files with 204 additions and 117 deletions

View File

@ -54,8 +54,8 @@ The Math project is a library of lightweight, self-contained mathematics and sta
</dependency> -->
</dependencies>
<!-- <issueTrackingUrl>http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&amp;bug_status=ASSIGNED&amp;bug_status=REOPENED&amp;email1=&amp;emailtype1=substring&amp;emailassigned_to1=1&amp;email2=&amp;emailtype2=substring&amp;emailreporter2=1&amp;bugidtype=include&amp;bug_id=&amp;changedin=&amp;votes=&amp;chfieldfrom=&amp;chfieldto=Now&amp;chfieldvalue=&amp;product=Commons&amp;component=Sandbox&amp;short_desc=&amp;short_desc_type=allwordssubstr&amp;long_desc=&amp;long_desc_type=allwordssubstr&amp;bug_file_loc=&amp;bug_file_loc_type=allwordssubstr&amp;keywords=&amp;keywords_type=anywords&amp;field0-0-0=noop&amp;type0-0-0=noop&amp;value0-0-0=&amp;cmdtype=doit&amp;order=%27Importance%27</issueTrackingUrl>
-->
<issueTrackingUrl>http://nagoya.apache.org/bugzilla/buglist.cgi?bug_status=NEW&amp;bug_status=ASSIGNED&amp;bug_status=REOPENED&amp;email1=&amp;emailtype1=substring&amp;emailassigned_to1=1&amp;email2=&amp;emailtype2=substring&amp;emailreporter2=1&amp;bugidtype=include&amp;bug_id=&amp;changedin=&amp;votes=&amp;chfieldfrom=&amp;chfieldto=Now&amp;chfieldvalue=&amp;product=Commons&amp;component=Sandbox&amp;short_desc=&amp;short_desc_type=allwordssubstr&amp;long_desc=&amp;long_desc_type=allwordssubstr&amp;bug_file_loc=&amp;bug_file_loc_type=allwordssubstr&amp;keywords=&amp;keywords_type=anywords&amp;field0-0-0=noop&amp;type0-0-0=noop&amp;value0-0-0=&amp;cmdtype=doit&amp;order=%27Importance%27</issueTrackingUrl>
<repository>
<connection>scm:cvs:pserver:anoncvs@cvs.apache.org:/home/cvspublic:jakarta-commons-sandbox/${pom.artifactId.substring(8)}</connection>
<url>http://cvs.apache.org/viewcvs/jakarta-commons-sandbox/${pom.artifactId.substring(8)}/</url>

View File

@ -56,13 +56,24 @@
/**
*
* Accumulates univariate statistics for values fed in
* through the addValue() method. This interface defines the LCD interface
* which all Univariate implementations must implement.
* through the addValue() method. This interface defines the LCD interface
* which all Univariate implementations must implement. <p>
* A "rolling" capability is supported by all implementations with the following
* contract: <p>
* <i> Setting the windowSize property limits the domain of all statistics to
* the last <code>windowSize</code> values added.</i><p>
* We use the term <i>available values</i> throughout the API documentation
* to refer to these values when the windowSize is set. For example, if the
* windowSize is set to 3 and the values {1,2,3,4,5} have been added <strong>
* in that order</strong> then the <i>available values</i> are {3,4,5} and all
* reported statistics will be based on these values<p>
* The default windowSize is "infinite" -- i.e., all values added are included
* in all computations.
*
* @author Phil Steitz
* @author <a href="mailto:tobrien@apache.org">Tim O'Brien</a>
* @author Mark Diggory
* @version $Revision: 1.5 $ $Date: 2003/05/21 17:59:19 $
* @version $Revision: 1.6 $ $Date: 2003/05/23 17:33:18 $
*
*/
public interface Univariate {
@ -71,65 +82,96 @@ public interface Univariate {
* Adds the value to the set of numbers
* @param v the value to be added
*/
public abstract void addValue(double v);
abstract void addValue(double v);
/**
* Returns the mean of the values that have been added
* Returns the <a href=http://www.xycoon.com/arithmetic_mean.htm>
* arithmetic mean </a> of the available values <p>
*
* Will return Double.NaN if no values have been added when
* this method is invoked.
*
* @return mean value
*/
public abstract double getMean();
abstract double getMean();
/**
* Returns the geometric mean of the values that have been added
* Returns the <a href=http://www.xycoon.com/geometric_mean.htm>
* geometric mean </a> of the available values <p>
*
* Will return Double.NaN if no values have been added or the product
* of the available values is less than or equal to 0.
*
* @return mean value
*/
public abstract double getGeometricMean();
abstract double getGeometricMean();
/**
* Returns the product of all values that have been added
* Returns the product of the available values <p>
* Will return Double.NaN if no values have been added.
*
* @return product of all values
*/
public abstract double getProduct();
abstract double getProduct();
/**
* Returns the variance of the values that have been added
* @return variance value
* Returns the variance of the available values. <p>
* Double.NaN is returned for an empty set of values and 0.0 is
* returned for a single value set.
*
* @return The variance of a set of values.
*/
public abstract double getVariance();
abstract double getVariance();
/**
* Returns the standard deviation of the values that have been added
* Returns the variance of the available values. <p>
* Double.NaN is returned for an empty set of values and 0.0 is
* returned for a single value set.
*
* @return standard deviation value
*/
public abstract double getStandardDeviation();
abstract double getStandardDeviation();
/** Getter for property max.
/**
* Returns the maximum of the available values <p>
* Double.NaN is returned in no values have been added
*
* @return Value of property max.
*/
public abstract double getMax();
abstract double getMax();
/** Getter for property min.
/**
* Returns the minimum of the available values <p>
* Double.NaN is returned in no values have been added
*
* @return Value of property min.
*/
public abstract double getMin();
abstract double getMin();
/** Getter for property n.
* @return Value of property n.
/**
* Returns the number of available values
* @return the number of available values
*/
public abstract int getN();
abstract int getN();
/** Getter for property sum.
* @return Value of property sum.
/**
* Returns the sum of the available values <p>
* Returns 0 if no values have been added.
*
* @return the sum of the available values
*/
public abstract double getSum();
abstract double getSum();
/** Getter for property sumsq.
* @return Value of property sumsq.
/**
* Returns the sum of the squares of the available values.
* Returns 0 if no values have been added.
*
* @return the sum of the squares of the available values.
*/
public abstract double getSumsq();
abstract double getSumsq();
/** Resets all sums to 0, resets min and max */
public abstract void clear();
/** Resets all statistics */
abstract void clear();
/**
* This constant signals that a Univariate implementation
@ -137,13 +179,13 @@ public interface Univariate {
* elements. In other words, if getWindow returns this
* constant, there is, in effect, no "window".
*/
public static final int INIFINTE_WINDOW = -1;
static final int INIFINTE_WINDOW = -1;
/**
* Univariate has the ability to return only measures for the
* last N elements added to the set of values. This function returns
*/
public abstract int getWindowSize();
abstract int getWindowSize();
/**
* Sets the window. windowSize controls the number of value
@ -151,5 +193,5 @@ public interface Univariate {
* For example, a window value of 10 means that getMean()
* will return the mean of the last 10 values added.
*/
public abstract void setWindowSize(int windowSize);
abstract void setWindowSize(int windowSize);
}

View File

@ -59,14 +59,15 @@ import java.io.Serializable;
*
* Accumulates univariate statistics for values fed in
* through the addValue() method. Does not store raw data values.
* All data (including n) are represented internally as doubles.
* All data are represented internally as doubles.
* Integers, floats and longs can be added, but will be converted
* to doubles by addValue().
*
* @author Phil Steitz
* @author Mark Diggory
* @author <a href="mailto:tobrien@apache.org">Tim O'Brien</a>
* @version $Revision: 1.6 $ $Date: 2003/05/21 17:59:19 $
* @author Mark Diggory
* @author Brent Worden
* @version $Revision: 1.7 $ $Date: 2003/05/23 17:33:18 $
*
*/
public class UnivariateImpl implements Univariate, Serializable {
@ -94,7 +95,7 @@ public class UnivariateImpl implements Univariate, Serializable {
/** max of values that have been added */
private double max = Double.MIN_VALUE;
/** produce of values that have been added */
/** product of values that have been added */
private double product = Double.NaN;
/** Creates new univariate */
@ -108,45 +109,36 @@ public class UnivariateImpl implements Univariate, Serializable {
doubleArray = new FixedDoubleArray( window );
}
/**
* Adds the value, updating running sums.
* @param v the value to be added
*/
public void addValue(double v) {
insertValue(v);
}
/**
* Returns the mean of the values that have been added
* @return mean value
*/
public double getMean() {
// FIXME: throw something meaningful if n = 0
return (sum / (double) n );
if (n == 0) {
return Double.NaN;
} else {
return (sum / (double) n );
}
}
/**
* Returns the geometric mean of the values that have been added
* @return geometric mean value
*/
public double getGeometricMean() {
return Math.pow(product,( 1.0/n ) );
if ((product <= 0.0) || (n == 0)) {
return Double.NaN;
} else {
return Math.pow(product,( 1.0/(double)n ) );
}
}
/**
* Returns the product of all values add to this Univariate
* @return product value
*/
public double getProduct() {
return product;
}
/**
* Returns the variance of the values that have been added.
* @return The variance of a set of values. Double.NaN is returned for
* an empty set of values and 0.0 is returned for a single value set.
*/
public double getVariance() {
double variance = Double.NaN;
@ -160,21 +152,16 @@ public class UnivariateImpl implements Univariate, Serializable {
return variance;
}
/**
* Returns the standard deviation of the values that have been added
* @return The standard deviation of a set of values. Double.NaN is
* returned for an empty set of values and 0.0 is returned for
* a single value set.
*/
public double getStandardDeviation() {
return (new Double(Math.sqrt
((new Double(getVariance())).doubleValue()))).doubleValue();
double variance = getVariance();
if ((variance == 0.0) || (variance == Double.NaN)) {
return variance;
} else {
return Math.sqrt(variance);
}
}
/**
* Adds the value, updating running sums.
* @param v the value to be added
*/
private void insertValue(double v) {
// The default value of product is NaN, if you
@ -197,25 +184,28 @@ public class UnivariateImpl implements Univariate, Serializable {
sum -= discarded;
sumsq -= discarded * discarded;
// Include the influence of the new
// TODO: The next two lines seems rather expensive, but
// I don't see many alternatives.
min = doubleArray.getMin();
max = doubleArray.getMax();
if(discarded == min) {
min = doubleArray.getMin();
} else {
if(discarded == max){
max = doubleArray.getMax();
}
}
sum += v;
sumsq += v*v;
// Note that the product CANNOT be discarded
// properly because one cannot discount the effect
// of a zero value. For this reason, the product
// of the altered array must be calculated from the
// current array elements. Product must be recalculated
// everytime the array is "rolled"
product = 1.0;
double[] elements = doubleArray.getElements();
for( int i = 0; i < elements.length; i++ ) {
if(product != 0.0){
// can safely remove discarded value
product *= v/discarded;
} else if(discarded == 0.0){
// need to recompute product
product = 1.0;
double[] elements = doubleArray.getElements();
for( int i = 0; i < elements.length; i++ ) {
product *= elements[i];
}
}
} // else product = 0 and will still be 0 after discard
} else {
doubleArray.addElement( v );
@ -243,21 +233,22 @@ public class UnivariateImpl implements Univariate, Serializable {
* @return Value of property max.
*/
public double getMax() {
return max;
}
/** Setter for property max.
* @param max New value of property max.
*/
public void setMax(double max) {
this.max = max;
if (n == 0) {
return Double.NaN;
} else {
return max;
}
}
/** Getter for property min.
* @return Value of property min.
*/
public double getMin() {
return min;
if (n == 0) {
return Double.NaN;
} else {
return min;
}
}
/** Getter for property n.
@ -305,6 +296,7 @@ public class UnivariateImpl implements Univariate, Serializable {
this.n = 0;
this.min = Double.MAX_VALUE;
this.max = Double.MIN_VALUE;
this.product = Double.NaN;
}
/* (non-Javadoc)

View File

@ -60,8 +60,9 @@ import junit.framework.TestSuite;
/**
* Test cases for the {@link Univariate} class.
*
* @author <a href="mailto:phil@steitz.com">Phil Steitz</a>
* @version $Revision: 1.2 $ $Date: 2003/05/21 17:59:20 $
* @author Phil Steitz
* @author Tim Obrien
* @version $Revision: 1.3 $ $Date: 2003/05/23 17:33:18 $
*/
public final class UnivariateImplTest extends TestCase {
@ -114,16 +115,21 @@ public final class UnivariateImplTest extends TestCase {
public void testN0andN1Conditions() throws Exception {
UnivariateImpl u = new UnivariateImpl();
assertTrue("Mean of n = 0 set should be NaN", Double.isNaN( u.getMean() ) );
assertTrue("Standard Deviation of n = 0 set should be NaN", Double.isNaN( u.getStandardDeviation() ) );
assertTrue("Variance of n = 0 set should be NaN", Double.isNaN(u.getVariance() ) );
assertTrue("Mean of n = 0 set should be NaN",
Double.isNaN( u.getMean() ) );
assertTrue("Standard Deviation of n = 0 set should be NaN",
Double.isNaN( u.getStandardDeviation() ) );
assertTrue("Variance of n = 0 set should be NaN",
Double.isNaN(u.getVariance() ) );
u.addValue(one);
u.addValue(one);
assertTrue( "Mean of n = 1 set should be value of single item n1", u.getMean() == one);
assertTrue( "Mean of n = 1 set should be zero", u.getStandardDeviation() == 0);
assertTrue( "Variance of n = 1 set should be zero", u.getVariance() == 0);
assertTrue( "Mean of n = 1 set should be value of single item n1",
u.getMean() == one);
assertTrue( "Mean of n = 1 set should be zero",
u.getStandardDeviation() == 0);
assertTrue( "Variance of n = 1 set should be zero",
u.getVariance() == 0);
}
public void testProductAndGeometricMean() throws Exception {
@ -134,8 +140,10 @@ public final class UnivariateImplTest extends TestCase {
u.addValue( 3.0 );
u.addValue( 4.0 );
assertEquals( "Product not expected", 24.0, u.getProduct(), Double.MIN_VALUE );
assertEquals( "Geometric mean not expected", 2.213364, u.getGeometricMean(), 0.00001 );
assertEquals( "Product not expected", 24.0, u.getProduct(),
Double.MIN_VALUE );
assertEquals( "Geometric mean not expected", 2.213364,
u.getGeometricMean(), 0.00001 );
// Now test rolling - UnivariateImpl should discount the contribution
// of a discarded element
@ -144,11 +152,56 @@ public final class UnivariateImplTest extends TestCase {
}
// Values should be (2,3,4,5,6,7,8,9,10,11)
assertEquals( "Product not expected", 39916800.0, u.getProduct(), 0.00001 );
assertEquals( "Geometric mean not expected", 5.755931, u.getGeometricMean(), 0.00001 );
assertEquals( "Product not expected", 39916800.0,
u.getProduct(), 0.00001 );
assertEquals( "Geometric mean not expected", 5.755931,
u.getGeometricMean(), 0.00001 );
}
public void testRollingMinMax() {
UnivariateImpl u = new UnivariateImpl(3);
u.addValue( 1.0 );
u.addValue( 5.0 );
u.addValue( 3.0 );
u.addValue( 4.0 ); // discarding min
assertEquals( "min not expected", 3.0,
u.getMin(), Double.MIN_VALUE);
u.addValue(1.0); // discarding max
assertEquals( "max not expected", 4.0,
u.getMax(), Double.MIN_VALUE);
}
public void testNaNContracts() {
UnivariateImpl u = new UnivariateImpl();
double nan = Double.NaN;
assertTrue("mean not NaN",Double.isNaN(u.getMean()));
assertTrue("min not NaN",Double.isNaN(u.getMin()));
assertTrue("std dev not NaN",Double.isNaN(u.getStandardDeviation()));
assertTrue("var not NaN",Double.isNaN(u.getVariance()));
assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
u.addValue(1.0);
assertEquals( "mean not expected", 1.0,
u.getMean(), Double.MIN_VALUE);
assertEquals( "variance not expected", 0.0,
u.getVariance(), Double.MIN_VALUE);
assertEquals( "geometric mean not expected", 1.0,
u.getGeometricMean(), Double.MIN_VALUE);
u.addValue(-1.0);
assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
u.addValue(0.0);
assertTrue("geom mean not NaN",Double.isNaN(u.getGeometricMean()));
//FiXME: test all other NaN contract specs
}
}