[MATH-160] introduced a new UnknownDistributionChisSquareTest interface

to hold the new methods without creating binary incompatibilities with
commons-math 1.1
The factories have been deprecated and do not handle this new interface

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@574049 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Luc Maisonobe 2007-09-09 18:46:38 +00:00
parent d762008115
commit 404971c56a
8 changed files with 214 additions and 134 deletions

View File

@ -20,7 +20,9 @@ import org.apache.commons.math.MathException;
/**
* An interface for Chi-Square tests.
*
* <p>This interface handles only known distributions. If the distribution is
* unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
* UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
* @version $Revision$ $Date$
*/
public interface ChiSquareTest {
@ -28,7 +30,7 @@ public interface ChiSquareTest {
/**
* Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
* Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
* freqeuncy counts.
* frequency counts.
* <p>
* This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
* the observed counts follow the expected distribution.
@ -212,117 +214,4 @@ public interface ChiSquareTest {
boolean chiSquareTest(long[][] counts, double alpha)
throws IllegalArgumentException, MathException;
/**
* <p>Computes a
* <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
* Chi-Square two sample test statistic</a> comparing bin frequency counts
* in <code>observed1</code> and <code>observed2</code>. The
* sums of frequency counts in the two samples are not required to be the
* same. The formula used to compute the test statistic is</p>
* <code>
* &sum;[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
* </code> where
* <br/><code>K = &sqrt;[&sum(observed2 / &sum;(observed1)]</code>
* </p>
* <p>This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
* both observed counts follow the same distribution.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>Observed counts must be non-negative.
* </li>
* <li>Observed counts for a specific bin must not both be zero.
* </li>
* <li>Observed counts for a specific sample must not all be 0.
* </li>
* <li>The arrays <code>observed1</code> and <code>observed2</code> must have the same length and
* their common length must be at least 2.
* </li></ul><p>
* If any of the preconditions are not met, an
* <code>IllegalArgumentException</code> is thrown.
*
* @param observed1 array of observed frequency counts of the first data set
* @param observed2 array of observed frequency counts of the second data set
* @return chiSquare statistic
* @throws IllegalArgumentException if preconditions are not met
*/
double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException;
/**
* <p>Returns the <i>observed significance level</i>, or <a href=
* "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a Chi-Square two sample test comparing
* bin frequency counts in <code>observed1</code> and
* <code>observed2</code>.
* </p>
* <p>The number returned is the smallest significance level at which one
* can reject the null hypothesis that the observed counts conform to the
* same distribution.
* </p>
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
* on the formula used to compute the test statistic. The degrees of
* of freedom used to perform the test is one less than the common length
* of the input observed count arrays.
* </p>
* <strong>Preconditions</strong>: <ul>
* <li>Observed counts must be non-negative.
* </li>
* <li>Observed counts for a specific bin must not both be zero.
* </li>
* <li>Observed counts for a specific sample must not all be 0.
* </li>
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
* have the same length and
* their common length must be at least 2.
* </li></ul><p>
* If any of the preconditions are not met, an
* <code>IllegalArgumentException</code> is thrown.
*
* @param observed1 array of observed frequency counts of the first data set
* @param observed2 array of observed frequency counts of the second data set
* @return p-value
* @throws IllegalArgumentException if preconditions are not met
* @throws MathException if an error occurs computing the p-value
*/
double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException, MathException;
/**
* <p>Performs a Chi-Square two sample test comparing two binned data
* sets. The test evaluates the null hypothesis that the two lists of
* observed counts conform to the same frequency distribution, with
* significance level <code>alpha</code>. Returns true iff the null
* hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
* </p>
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
* details on the formula used to compute the Chisquare statistic used
* in the test. The degrees of of freedom used to perform the test is
* one less than the common length of the input observed count arrays.
* </p>
* <strong>Preconditions</strong>: <ul>
* <li>Observed counts must be non-negative.
* </li>
* <li>Observed counts for a specific bin must not both be zero.
* </li>
* <li>Observed counts for a specific sample must not all be 0.
* </li>
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
* have the same length and their common length must be at least 2.
* </li>
* <li> <code> 0 < alpha < 0.5 </code>
* </li></ul><p>
* If any of the preconditions are not met, an
* <code>IllegalArgumentException</code> is thrown.
*
* @param observed1 array of observed frequency counts of the first data set
* @param observed2 array of observed frequency counts of the second data set
* @param alpha significance level of the test
* @return true iff null hypothesis can be rejected with confidence
* 1 - alpha
* @throws IllegalArgumentException if preconditions are not met
* @throws MathException if an error occurs performing the test
*/
boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, double alpha)
throws IllegalArgumentException, MathException;
}

View File

@ -22,11 +22,12 @@ import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
import org.apache.commons.math.distribution.DistributionFactory;
/**
* Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface.
* Implements Chi-Square test statistics defined in the
* {@link UnknownDistributionChiSquareTest} interface.
*
* @version $Revision$ $Date$
*/
public class ChiSquareTestImpl implements ChiSquareTest {
public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
/** Distribution used to compute inference statistics. */
private ChiSquaredDistribution distribution;

View File

@ -22,6 +22,8 @@ import org.apache.commons.discovery.tools.DiscoverClass;
*
* @since 1.1
* @version $Revision$ $Date$
* @deprecated as of 1.2, pluggability of test instances is now provided through
* constructors and setters.
*/
public abstract class TestFactory {
/**
@ -57,9 +59,9 @@ public abstract class TestFactory {
public abstract TTest createTTest();
/**
* Create a ChiSquareTest instance.
* Create an UnknownDistributionChiSquareTest instance.
*
* @return a new ChiSquareTest instance
* @return a new UnknownDistributionChiSquareTest instance
*/
public abstract ChiSquareTest createChiSquareTest();
}

View File

@ -20,6 +20,8 @@ package org.apache.commons.math.stat.inference;
* A concrete inference test factory. This is the default factory used by
* Commons-Math.
*
* @deprecated as of 1.2, pluggability of test instances is now provided through
* constructors and setters.
* @since 1.1
* @version $Revision$ $Date$
*/
@ -42,11 +44,11 @@ public class TestFactoryImpl extends TestFactory {
}
/**
* Create a ChiSquareTest instance.
* Create an UnknownDistributionChiSquareTest instance.
*
* @return a new ChiSquareTest instance
* @return a new UnknownDistributionChiSquareTest instance
*/
public ChiSquareTest createChiSquareTest() {
public ChiSquareTest createChiSquareTest() {
return new ChiSquareTestImpl();
}

View File

@ -34,12 +34,26 @@ public class TestUtils {
super();
}
/** Singleton TTest instance initialized using configured factory */
private static TTest tTest = TestFactory.newInstance().createTTest();
/** Singleton TTest instance using default implementation. */
private static TTest tTest = new TTestImpl();
/** Singleton ChiSquareTest instance initialized using configured factory */
/** Singleton ChiSquareTest instance using default implementation. */
private static ChiSquareTest chiSquareTest =
TestFactory.newInstance().createChiSquareTest();
new ChiSquareTestImpl();
/** Singleton ChiSquareTest instance using default implementation. */
private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest =
new ChiSquareTestImpl();
/**
* Set the (singleton) TTest instance.
*
* @param tTest the new instance to use
* @since 1.2
*/
public static void setChiSquareTest(TTest tTest) {
TestUtils.tTest = tTest;
}
/**
* Return a (singleton) TTest instance. Does not create a new instance.
@ -50,6 +64,16 @@ public class TestUtils {
return tTest;
}
/**
* Set the (singleton) ChiSquareTest instance.
*
* @param chiSquareTest the new instance to use
* @since 1.2
*/
public static void setChiSquareTest(ChiSquareTest chiSquareTest) {
TestUtils.chiSquareTest = chiSquareTest;
}
/**
* Return a (singleton) ChiSquareTest instance. Does not create a new instance.
*
@ -59,6 +83,25 @@ public class TestUtils {
return chiSquareTest;
}
/**
* Set the (singleton) UnknownDistributionChiSquareTest instance.
*
* @param unknownDistributionChiSquareTest the new instance to use
* @since 1.2
*/
public static void setUnknownDistributionChiSquareTest(UnknownDistributionChiSquareTest unknownDistributionChiSquareTest) {
TestUtils.unknownDistributionChiSquareTest = unknownDistributionChiSquareTest;
}
/**
* Return a (singleton) UnknownDistributionChiSquareTest instance. Does not create a new instance.
*
* @return a UnknownDistributionChiSquareTest instance
*/
public static UnknownDistributionChiSquareTest getUnknownDistributionChiSquareTest() {
return unknownDistributionChiSquareTest;
}
/**
* @see org.apache.commons.math.stat.inference.TTest#homoscedasticT(double[], double[])
*/
@ -277,29 +320,29 @@ public class TestUtils {
}
/**
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareDataSetsComparison(long[], long[])
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareDataSetsComparison(long[], long[])
*/
public static double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException {
return chiSquareTest.chiSquareDataSetsComparison(observed1, observed2);
return unknownDistributionChiSquareTest.chiSquareDataSetsComparison(observed1, observed2);
}
/**
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTestDataSetsComparison(long[], long[])
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[])
*/
public static double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException, MathException {
return chiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2);
return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2);
}
/**
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double)
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double)
*/
public static boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
double alpha)
throws IllegalArgumentException, MathException {
return chiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
}

View File

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.inference;
import org.apache.commons.math.MathException;
/**
* An interface for Chi-Square tests for unknown distributions.
* <p>Two samples tests are used when the distribution is unknown <i>a priori</i>
* but provided by one sample. We compare the second sample against the first.</p>
*
* @version $Revision: 553603 $ $Date: 2007-07-05 20:34:45 +0200 (jeu, 05 jui 2007) $
*/
public interface UnknownDistributionChiSquareTest extends ChiSquareTest {
/**
* <p>Computes a
* <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
* Chi-Square two sample test statistic</a> comparing bin frequency counts
* in <code>observed1</code> and <code>observed2</code>. The
* sums of frequency counts in the two samples are not required to be the
* same. The formula used to compute the test statistic is</p>
* <code>
* &sum;[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
* </code> where
* <br/><code>K = &sqrt;[&sum(observed2 / &sum;(observed1)]</code>
* </p>
* <p>This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
* both observed counts follow the same distribution.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>Observed counts must be non-negative.
* </li>
* <li>Observed counts for a specific bin must not both be zero.
* </li>
* <li>Observed counts for a specific sample must not all be 0.
* </li>
* <li>The arrays <code>observed1</code> and <code>observed2</code> must have the same length and
* their common length must be at least 2.
* </li></ul><p>
* If any of the preconditions are not met, an
* <code>IllegalArgumentException</code> is thrown.
*
* @param observed1 array of observed frequency counts of the first data set
* @param observed2 array of observed frequency counts of the second data set
* @return chiSquare statistic
* @throws IllegalArgumentException if preconditions are not met
*/
double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException;
/**
* <p>Returns the <i>observed significance level</i>, or <a href=
* "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a Chi-Square two sample test comparing
* bin frequency counts in <code>observed1</code> and
* <code>observed2</code>.
* </p>
* <p>The number returned is the smallest significance level at which one
* can reject the null hypothesis that the observed counts conform to the
* same distribution.
* </p>
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
* on the formula used to compute the test statistic. The degrees of
* of freedom used to perform the test is one less than the common length
* of the input observed count arrays.
* </p>
* <strong>Preconditions</strong>: <ul>
* <li>Observed counts must be non-negative.
* </li>
* <li>Observed counts for a specific bin must not both be zero.
* </li>
* <li>Observed counts for a specific sample must not all be 0.
* </li>
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
* have the same length and
* their common length must be at least 2.
* </li></ul><p>
* If any of the preconditions are not met, an
* <code>IllegalArgumentException</code> is thrown.
*
* @param observed1 array of observed frequency counts of the first data set
* @param observed2 array of observed frequency counts of the second data set
* @return p-value
* @throws IllegalArgumentException if preconditions are not met
* @throws MathException if an error occurs computing the p-value
*/
double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
throws IllegalArgumentException, MathException;
/**
* <p>Performs a Chi-Square two sample test comparing two binned data
* sets. The test evaluates the null hypothesis that the two lists of
* observed counts conform to the same frequency distribution, with
* significance level <code>alpha</code>. Returns true iff the null
* hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
* </p>
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
* details on the formula used to compute the Chisquare statistic used
* in the test. The degrees of of freedom used to perform the test is
* one less than the common length of the input observed count arrays.
* </p>
* <strong>Preconditions</strong>: <ul>
* <li>Observed counts must be non-negative.
* </li>
* <li>Observed counts for a specific bin must not both be zero.
* </li>
* <li>Observed counts for a specific sample must not all be 0.
* </li>
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
* have the same length and their common length must be at least 2.
* </li>
* <li> <code> 0 < alpha < 0.5 </code>
* </li></ul><p>
* If any of the preconditions are not met, an
* <code>IllegalArgumentException</code> is thrown.
*
* @param observed1 array of observed frequency counts of the first data set
* @param observed2 array of observed frequency counts of the second data set
* @param alpha significance level of the test
* @return true iff null hypothesis can be rejected with confidence
* 1 - alpha
* @throws IllegalArgumentException if preconditions are not met
* @throws MathException if an error occurs performing the test
*/
boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, double alpha)
throws IllegalArgumentException, MathException;
}

View File

@ -32,7 +32,7 @@ public class ChiSquareFactoryTest extends ChiSquareTestTest {
public void setUp() {
super.setUp();
testStatistic = TestUtils.getChiSquareTest();
testStatistic = TestUtils.getUnknownDistributionChiSquareTest();
}
public static Test suite() {

View File

@ -28,7 +28,7 @@ import junit.framework.TestSuite;
public class ChiSquareTestTest extends TestCase {
protected ChiSquareTest testStatistic = new ChiSquareTestImpl();
protected UnknownDistributionChiSquareTest testStatistic = new ChiSquareTestImpl();
public ChiSquareTestTest(String name) {
super(name);