Added OneWayAnova methods to TestUtils and updated User Guide

to cover One-way Anova tests.
JIRA: MATH-173


git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@618114 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2008-02-03 22:48:38 +00:00
parent 821bca564f
commit 61fabe8b9f
3 changed files with 147 additions and 19 deletions

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.commons.math.stat.inference; package org.apache.commons.math.stat.inference;
import java.util.Collection;
import org.apache.commons.math.MathException; import org.apache.commons.math.MathException;
import org.apache.commons.math.stat.descriptive.StatisticalSummary; import org.apache.commons.math.stat.descriptive.StatisticalSummary;
@ -45,6 +46,10 @@ public class TestUtils {
private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest = private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest =
new ChiSquareTestImpl(); new ChiSquareTestImpl();
/** Singleton OneWayAnova instance using default implementation. */
private static OneWayAnova oneWayAnova =
new OneWayAnovaImpl();
/** /**
* Set the (singleton) TTest instance. * Set the (singleton) TTest instance.
* *
@ -102,6 +107,27 @@ public class TestUtils {
return unknownDistributionChiSquareTest; return unknownDistributionChiSquareTest;
} }
/**
* Set the (singleton) OneWayAnova instance
*
* @param oneWayAnova the new instance to use
* @since 1.2
*/
public static void setOneWayAnova(OneWayAnova oneWayAnova) {
TestUtils.oneWayAnova = oneWayAnova;
}
/**
* Return a (singleton) OneWayAnova instance. Does not create a new instance.
*
* @return a OneWayAnova instance
* @since 1.2
*/
public static OneWayAnova getOneWayAnova() {
return oneWayAnova;
}
/** /**
* @see org.apache.commons.math.stat.inference.TTest#homoscedasticT(double[], double[]) * @see org.apache.commons.math.stat.inference.TTest#homoscedasticT(double[], double[])
*/ */
@ -321,6 +347,8 @@ public class TestUtils {
/** /**
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareDataSetsComparison(long[], long[]) * @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareDataSetsComparison(long[], long[])
*
* @since 1.2
*/ */
public static double chiSquareDataSetsComparison(long[] observed1, long[] observed2) public static double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException { throws IllegalArgumentException {
@ -329,6 +357,8 @@ public class TestUtils {
/** /**
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[]) * @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[])
*
* @since 1.2
*/ */
public static double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2) public static double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException, MathException { throws IllegalArgumentException, MathException {
@ -338,12 +368,43 @@ public class TestUtils {
/** /**
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double) * @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double)
*
* @since 1.2
*/ */
public static boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, public static boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
double alpha) double alpha)
throws IllegalArgumentException, MathException { throws IllegalArgumentException, MathException {
return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha); return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
} }
/**
* @see org.apache.commons.math.stat.inference.OneWayAnova#anovaFValue(Collection)
*
* @since 1.2
*/
public static double oneWayAnovaFValue(Collection categoryData)
throws IllegalArgumentException, MathException {
return oneWayAnova.anovaFValue(categoryData);
}
/**
* @see org.apache.commons.math.stat.inference.OneWayAnova#anovaPValue(Collection)
*
* @since 1.2
*/
public static double oneWayAnovaPValue(Collection categoryData)
throws IllegalArgumentException, MathException {
return oneWayAnova.anovaPValue(categoryData);
}
/**
* @see org.apache.commons.math.stat.inference.OneWayAnova#anovaTest(Collection,double)
*
* @since 1.2
*/
public static boolean oneWayAnovaTest(Collection categoryData, double alpha)
throws IllegalArgumentException, MathException {
return oneWayAnova.anovaTest(categoryData, alpha);
}
} }

View File

@ -16,6 +16,9 @@
*/ */
package org.apache.commons.math.stat.inference; package org.apache.commons.math.stat.inference;
import java.util.ArrayList;
import java.util.List;
import junit.framework.Test; import junit.framework.Test;
import junit.framework.TestCase; import junit.framework.TestCase;
import junit.framework.TestSuite; import junit.framework.TestSuite;
@ -440,4 +443,26 @@ public class TestUtilsTest extends TestCase {
assertFalse(TestUtils.pairedTTest(sample1, sample3, .001)); assertFalse(TestUtils.pairedTTest(sample1, sample3, .001));
assertTrue(TestUtils.pairedTTest(sample1, sample3, .002)); assertTrue(TestUtils.pairedTTest(sample1, sample3, .002));
} }
private double[] classA =
{93.0, 103.0, 95.0, 101.0};
private double[] classB =
{99.0, 92.0, 102.0, 100.0, 102.0};
private double[] classC =
{110.0, 115.0, 111.0, 117.0, 128.0};
private List classes = new ArrayList();
private OneWayAnova oneWayAnova = new OneWayAnovaImpl();
public void testOneWayAnovaUtils() throws Exception {
classes.add(classA);
classes.add(classB);
classes.add(classC);
assertEquals(oneWayAnova.anovaFValue(classes),
TestUtils.oneWayAnovaFValue(classes), 10E-12);
assertEquals(oneWayAnova.anovaPValue(classes),
TestUtils.oneWayAnovaPValue(classes), 10E-12);
assertEquals(oneWayAnova.anovaTest(classes, 0.01),
TestUtils.oneWayAnovaTest(classes, 0.01));
}
} }

View File

@ -29,7 +29,7 @@
<p> <p>
The statistics package provides frameworks and implementations for The statistics package provides frameworks and implementations for
basic Descriptive statistics, frequency distributions, bivariate regression, basic Descriptive statistics, frequency distributions, bivariate regression,
and t- and chi-square test statistics. and t-, chi-square and ANOVA test statistics.
</p> </p>
<p> <p>
<a href="#1.2 Descriptive statistics">Descriptive statistics</a><br></br> <a href="#1.2 Descriptive statistics">Descriptive statistics</a><br></br>
@ -399,30 +399,36 @@ System.out.println(regression.getSlopeStdErr());
<a href="../apidocs/org/apache/commons/math/stat/inference/"> <a href="../apidocs/org/apache/commons/math/stat/inference/">
org.apache.commons.math.stat.inference</a> package provide org.apache.commons.math.stat.inference</a> package provide
<a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm"> <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm">
Student's t</a> and Student's t</a>,
<a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
Chi-Square</a> test statistics as well as Chi-Square</a> and
<a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm">
One-Way ANOVA</a> test statistics as well as
<a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
p-values</a> associated with <code>t-</code> and p-values</a> associated with <code>t-</code>,
<code>Chi-Square</code> tests. The interfaces are <code>Chi-Square</code> and <code>One-Way ANOVA</code> tests. The
interfaces are
<a href="../apidocs/org/apache/commons/math/stat/inference/TTest.html"> <a href="../apidocs/org/apache/commons/math/stat/inference/TTest.html">
TTest</a> and TTest</a>,
<a href="../apidocs/org/apache/commons/math/stat/inference/ChiSquareTest.html"> <a href="../apidocs/org/apache/commons/math/stat/inference/ChiSquareTest.html">
ChiSquareTest</a>, with ChiSquareTest</a>, and
provided implementations <a href="../apidocs/org/apache/commons/math/stat/inference/OneWayAnova.html">
OneWayAnova</a> with provided implementations
<a href="../apidocs/org/apache/commons/math/stat/inference/TTestImpl.html"> <a href="../apidocs/org/apache/commons/math/stat/inference/TTestImpl.html">
TTestImpl</a> and TTestImpl</a>,
<a href="../apidocs/org/apache/commons/math/stat/inference/ChiSquareTestImpl.html"> <a href="../apidocs/org/apache/commons/math/stat/inference/ChiSquareTestImpl.html">
ChiSquareTestImpl</a>. ChiSquareTestImpl</a> and
Abstract and default factories are provided, with configuration <a href="../apidocs/org/apache/commons/math/stat/inference/OneWayAnovaImpl.html">
optional using commons-discovery to specify the concrete factory. The OneWayAnovaImpl</a>, respectively.
The
<a href="../apidocs/org/apache/commons/math/stat/inference/TestUtils.html"> <a href="../apidocs/org/apache/commons/math/stat/inference/TestUtils.html">
TestUtils</a> class provides static methods to get test instances or TestUtils</a> class provides static methods to get test instances or
to compute test statistics directly. The examples below all use the to compute test statistics directly. The examples below all use the
static methods in <code>TestUtils</code> to execute tests. To get static methods in <code>TestUtils</code> to execute tests. To get
test object instances, either use e.g., test object instances, either use e.g.,
<code>TestUtils.getTTest()</code> or use the factory directly, e.g., <code>TestUtils.getTTest()</code> or use the implementation constructors
<code>TestFactory.newInstance().createChiSquareTest()</code>. directly, e.g.,
<code>new TTestImpl()</code>.
</p> </p>
<p> <p>
<strong>Implementation Notes</strong> <strong>Implementation Notes</strong>
@ -448,8 +454,8 @@ System.out.println(regression.getSlopeStdErr());
assumptions of the parametric t-test procedure, as discussed assumptions of the parametric t-test procedure, as discussed
<a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
here</a></li> here</a></li>
<li>p-values returned by both t- and chi-square tests are exact, based <li>p-values returned by t-, chi-square and Anova tests are exact, based
on numerical approximations to the t- and chi-square distributions in the on numerical approximations to the t-, chi-square and F distributions in the
<code>distributions</code> package. </li> <code>distributions</code> package. </li>
<li>p-values returned by t-tests are for two-sided tests and the boolean-valued <li>p-values returned by t-tests are for two-sided tests and the boolean-valued
methods supporting fixed significance level tests assume that the hypotheses methods supporting fixed significance level tests assume that the hypotheses
@ -512,6 +518,7 @@ TestUtils.tTest(mu, observed, alpha);
To test, for example at the 95% level of confidence, use To test, for example at the 95% level of confidence, use
<code>alpha = 0.05</code> <code>alpha = 0.05</code>
</dd> </dd>
<br></br>
<dt><strong>Two-Sample t-tests</strong></dt> <dt><strong>Two-Sample t-tests</strong></dt>
<br></br> <br></br>
<dd><strong>Example 1:</strong> Paired test evaluating <dd><strong>Example 1:</strong> Paired test evaluating
@ -584,7 +591,8 @@ TestUtils.tTest(sample1, sample2, .05);
replace "t" at the beginning of the method name with "homoscedasticT" replace "t" at the beginning of the method name with "homoscedasticT"
</p> </p>
</dd> </dd>
<dt>Computing <code>chi-square</code> test statistics</dt> <br></br>
<dt><strong>Chi-square tests</strong></dt>
<br></br> <br></br>
<dd>To compute a chi-square statistic measuring the agreement between a <dd>To compute a chi-square statistic measuring the agreement between a
<code>long[]</code> array of observed counts and a <code>double[]</code> <code>long[]</code> array of observed counts and a <code>double[]</code>
@ -644,7 +652,41 @@ TestUtils.chiSquareTest(counts, alpha);
The boolean value returned will be <code>true</code> iff the null The boolean value returned will be <code>true</code> iff the null
hypothesis can be rejected with confidence <code>1 - alpha</code>. hypothesis can be rejected with confidence <code>1 - alpha</code>.
</dd> </dd>
</dl> <br></br>
<dt><strong><One-Way Anova tests</strong></dt>
<br></br>
<dd>To conduct a One-Way Analysis of Variance (ANOVA) to evaluate the
null hypothesis that the means of a collection of univariate datasets
are the same, start by loading the datasets into a collection, e.g.
<source>
double[] classA =
{93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, 94.0, 101.0 };
double[] classB =
{99.0, 92.0, 102.0, 100.0, 102.0, 89.0 };
double[] classC =
{110.0, 115.0, 111.0, 117.0, 128.0, 117.0 };
List classes = new ArrayList();
classes.add(classA);
classes.add(classB);
classes.add(classC);
</source>
Then you can compute ANOVA F- or p-values associated with the
null hypothesis that the class means are all the same
using a <code>OneWayAnova</code> instance or <code>TestUtils</code>
methods:
<source>
double fStatistic = TestUtils.oneWayAnovaFValue(classes); // F-value
double pValue = TestUtils.oneWayAnovaPValue(classes); // P-value
</source>
To test perform a One-Way Anova test with signficance level set at 0.01
(so the test will, assuming assumptions are met, reject the null
hypothesis incorrectly only about one in 100 times), use
<source>
TestUtils.oneWayAnovaTest(classes, 0.01); // returns a boolean
// true means reject null hypothesis
</source>
</dd>
</dl>
</p> </p>
</subsection> </subsection>
</section> </section>