[MATH-160] introduced a new UnknownDistributionChisSquareTest interface
to hold the new methods without creating binary incompatibilities with commons-math 1.1 The factories have been deprecated and do not handle this new interface git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@574049 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d762008115
commit
404971c56a
|
@ -20,7 +20,9 @@ import org.apache.commons.math.MathException;
|
|||
|
||||
/**
|
||||
* An interface for Chi-Square tests.
|
||||
*
|
||||
* <p>This interface handles only known distributions. If the distribution is
|
||||
* unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
|
||||
* UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
|
||||
* @version $Revision$ $Date$
|
||||
*/
|
||||
public interface ChiSquareTest {
|
||||
|
@ -28,7 +30,7 @@ public interface ChiSquareTest {
|
|||
/**
|
||||
* Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
|
||||
* Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
|
||||
* freqeuncy counts.
|
||||
* frequency counts.
|
||||
* <p>
|
||||
* This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
|
||||
* the observed counts follow the expected distribution.
|
||||
|
@ -212,117 +214,4 @@ public interface ChiSquareTest {
|
|||
boolean chiSquareTest(long[][] counts, double alpha)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
* <p>Computes a
|
||||
* <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
|
||||
* Chi-Square two sample test statistic</a> comparing bin frequency counts
|
||||
* in <code>observed1</code> and <code>observed2</code>. The
|
||||
* sums of frequency counts in the two samples are not required to be the
|
||||
* same. The formula used to compute the test statistic is</p>
|
||||
* <code>
|
||||
* ∑[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
|
||||
* </code> where
|
||||
* <br/><code>K = &sqrt;[&sum(observed2 / ∑(observed1)]</code>
|
||||
* </p>
|
||||
* <p>This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
|
||||
* both observed counts follow the same distribution.
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>Observed counts must be non-negative.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific bin must not both be zero.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific sample must not all be 0.
|
||||
* </li>
|
||||
* <li>The arrays <code>observed1</code> and <code>observed2</code> must have the same length and
|
||||
* their common length must be at least 2.
|
||||
* </li></ul><p>
|
||||
* If any of the preconditions are not met, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
*
|
||||
* @param observed1 array of observed frequency counts of the first data set
|
||||
* @param observed2 array of observed frequency counts of the second data set
|
||||
* @return chiSquare statistic
|
||||
* @throws IllegalArgumentException if preconditions are not met
|
||||
*/
|
||||
double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
|
||||
throws IllegalArgumentException;
|
||||
|
||||
/**
|
||||
* <p>Returns the <i>observed significance level</i>, or <a href=
|
||||
* "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a Chi-Square two sample test comparing
|
||||
* bin frequency counts in <code>observed1</code> and
|
||||
* <code>observed2</code>.
|
||||
* </p>
|
||||
* <p>The number returned is the smallest significance level at which one
|
||||
* can reject the null hypothesis that the observed counts conform to the
|
||||
* same distribution.
|
||||
* </p>
|
||||
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
|
||||
* on the formula used to compute the test statistic. The degrees of
|
||||
* of freedom used to perform the test is one less than the common length
|
||||
* of the input observed count arrays.
|
||||
* </p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>Observed counts must be non-negative.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific bin must not both be zero.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific sample must not all be 0.
|
||||
* </li>
|
||||
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
|
||||
* have the same length and
|
||||
* their common length must be at least 2.
|
||||
* </li></ul><p>
|
||||
* If any of the preconditions are not met, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
*
|
||||
* @param observed1 array of observed frequency counts of the first data set
|
||||
* @param observed2 array of observed frequency counts of the second data set
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if preconditions are not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
* <p>Performs a Chi-Square two sample test comparing two binned data
|
||||
* sets. The test evaluates the null hypothesis that the two lists of
|
||||
* observed counts conform to the same frequency distribution, with
|
||||
* significance level <code>alpha</code>. Returns true iff the null
|
||||
* hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
|
||||
* </p>
|
||||
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
|
||||
* details on the formula used to compute the Chisquare statistic used
|
||||
* in the test. The degrees of of freedom used to perform the test is
|
||||
* one less than the common length of the input observed count arrays.
|
||||
* </p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>Observed counts must be non-negative.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific bin must not both be zero.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific sample must not all be 0.
|
||||
* </li>
|
||||
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
|
||||
* have the same length and their common length must be at least 2.
|
||||
* </li>
|
||||
* <li> <code> 0 < alpha < 0.5 </code>
|
||||
* </li></ul><p>
|
||||
* If any of the preconditions are not met, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
*
|
||||
* @param observed1 array of observed frequency counts of the first data set
|
||||
* @param observed2 array of observed frequency counts of the second data set
|
||||
* @param alpha significance level of the test
|
||||
* @return true iff null hypothesis can be rejected with confidence
|
||||
* 1 - alpha
|
||||
* @throws IllegalArgumentException if preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, double alpha)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
}
|
||||
|
|
|
@ -22,11 +22,12 @@ import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
|
|||
import org.apache.commons.math.distribution.DistributionFactory;
|
||||
|
||||
/**
|
||||
* Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface.
|
||||
* Implements Chi-Square test statistics defined in the
|
||||
* {@link UnknownDistributionChiSquareTest} interface.
|
||||
*
|
||||
* @version $Revision$ $Date$
|
||||
*/
|
||||
public class ChiSquareTestImpl implements ChiSquareTest {
|
||||
public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
|
||||
|
||||
/** Distribution used to compute inference statistics. */
|
||||
private ChiSquaredDistribution distribution;
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.commons.discovery.tools.DiscoverClass;
|
|||
*
|
||||
* @since 1.1
|
||||
* @version $Revision$ $Date$
|
||||
* @deprecated as of 1.2, pluggability of test instances is now provided through
|
||||
* constructors and setters.
|
||||
*/
|
||||
public abstract class TestFactory {
|
||||
/**
|
||||
|
@ -57,9 +59,9 @@ public abstract class TestFactory {
|
|||
public abstract TTest createTTest();
|
||||
|
||||
/**
|
||||
* Create a ChiSquareTest instance.
|
||||
* Create an UnknownDistributionChiSquareTest instance.
|
||||
*
|
||||
* @return a new ChiSquareTest instance
|
||||
* @return a new UnknownDistributionChiSquareTest instance
|
||||
*/
|
||||
public abstract ChiSquareTest createChiSquareTest();
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.commons.math.stat.inference;
|
|||
* A concrete inference test factory. This is the default factory used by
|
||||
* Commons-Math.
|
||||
*
|
||||
* @deprecated as of 1.2, pluggability of test instances is now provided through
|
||||
* constructors and setters.
|
||||
* @since 1.1
|
||||
* @version $Revision$ $Date$
|
||||
*/
|
||||
|
@ -42,9 +44,9 @@ public class TestFactoryImpl extends TestFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a ChiSquareTest instance.
|
||||
* Create an UnknownDistributionChiSquareTest instance.
|
||||
*
|
||||
* @return a new ChiSquareTest instance
|
||||
* @return a new UnknownDistributionChiSquareTest instance
|
||||
*/
|
||||
public ChiSquareTest createChiSquareTest() {
|
||||
return new ChiSquareTestImpl();
|
||||
|
|
|
@ -34,12 +34,26 @@ public class TestUtils {
|
|||
super();
|
||||
}
|
||||
|
||||
/** Singleton TTest instance initialized using configured factory */
|
||||
private static TTest tTest = TestFactory.newInstance().createTTest();
|
||||
/** Singleton TTest instance using default implementation. */
|
||||
private static TTest tTest = new TTestImpl();
|
||||
|
||||
/** Singleton ChiSquareTest instance initialized using configured factory */
|
||||
/** Singleton ChiSquareTest instance using default implementation. */
|
||||
private static ChiSquareTest chiSquareTest =
|
||||
TestFactory.newInstance().createChiSquareTest();
|
||||
new ChiSquareTestImpl();
|
||||
|
||||
/** Singleton ChiSquareTest instance using default implementation. */
|
||||
private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest =
|
||||
new ChiSquareTestImpl();
|
||||
|
||||
/**
|
||||
* Set the (singleton) TTest instance.
|
||||
*
|
||||
* @param tTest the new instance to use
|
||||
* @since 1.2
|
||||
*/
|
||||
public static void setChiSquareTest(TTest tTest) {
|
||||
TestUtils.tTest = tTest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a (singleton) TTest instance. Does not create a new instance.
|
||||
|
@ -50,6 +64,16 @@ public class TestUtils {
|
|||
return tTest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the (singleton) ChiSquareTest instance.
|
||||
*
|
||||
* @param chiSquareTest the new instance to use
|
||||
* @since 1.2
|
||||
*/
|
||||
public static void setChiSquareTest(ChiSquareTest chiSquareTest) {
|
||||
TestUtils.chiSquareTest = chiSquareTest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a (singleton) ChiSquareTest instance. Does not create a new instance.
|
||||
*
|
||||
|
@ -59,6 +83,25 @@ public class TestUtils {
|
|||
return chiSquareTest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the (singleton) UnknownDistributionChiSquareTest instance.
|
||||
*
|
||||
* @param unknownDistributionChiSquareTest the new instance to use
|
||||
* @since 1.2
|
||||
*/
|
||||
public static void setUnknownDistributionChiSquareTest(UnknownDistributionChiSquareTest unknownDistributionChiSquareTest) {
|
||||
TestUtils.unknownDistributionChiSquareTest = unknownDistributionChiSquareTest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a (singleton) UnknownDistributionChiSquareTest instance. Does not create a new instance.
|
||||
*
|
||||
* @return a UnknownDistributionChiSquareTest instance
|
||||
*/
|
||||
public static UnknownDistributionChiSquareTest getUnknownDistributionChiSquareTest() {
|
||||
return unknownDistributionChiSquareTest;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.apache.commons.math.stat.inference.TTest#homoscedasticT(double[], double[])
|
||||
*/
|
||||
|
@ -277,29 +320,29 @@ public class TestUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareDataSetsComparison(long[], long[])
|
||||
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareDataSetsComparison(long[], long[])
|
||||
*/
|
||||
public static double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
|
||||
throws IllegalArgumentException {
|
||||
return chiSquareTest.chiSquareDataSetsComparison(observed1, observed2);
|
||||
return unknownDistributionChiSquareTest.chiSquareDataSetsComparison(observed1, observed2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTestDataSetsComparison(long[], long[])
|
||||
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[])
|
||||
*/
|
||||
public static double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
|
||||
throws IllegalArgumentException, MathException {
|
||||
return chiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2);
|
||||
return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double)
|
||||
* @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double)
|
||||
*/
|
||||
public static boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
|
||||
double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
return chiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
|
||||
return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.inference;
|
||||
|
||||
import org.apache.commons.math.MathException;
|
||||
|
||||
/**
|
||||
* An interface for Chi-Square tests for unknown distributions.
|
||||
* <p>Two samples tests are used when the distribution is unknown <i>a priori</i>
|
||||
* but provided by one sample. We compare the second sample against the first.</p>
|
||||
*
|
||||
* @version $Revision: 553603 $ $Date: 2007-07-05 20:34:45 +0200 (jeu, 05 jui 2007) $
|
||||
*/
|
||||
public interface UnknownDistributionChiSquareTest extends ChiSquareTest {
|
||||
|
||||
/**
|
||||
* <p>Computes a
|
||||
* <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
|
||||
* Chi-Square two sample test statistic</a> comparing bin frequency counts
|
||||
* in <code>observed1</code> and <code>observed2</code>. The
|
||||
* sums of frequency counts in the two samples are not required to be the
|
||||
* same. The formula used to compute the test statistic is</p>
|
||||
* <code>
|
||||
* ∑[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
|
||||
* </code> where
|
||||
* <br/><code>K = &sqrt;[&sum(observed2 / ∑(observed1)]</code>
|
||||
* </p>
|
||||
* <p>This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
|
||||
* both observed counts follow the same distribution.
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>Observed counts must be non-negative.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific bin must not both be zero.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific sample must not all be 0.
|
||||
* </li>
|
||||
* <li>The arrays <code>observed1</code> and <code>observed2</code> must have the same length and
|
||||
* their common length must be at least 2.
|
||||
* </li></ul><p>
|
||||
* If any of the preconditions are not met, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
*
|
||||
* @param observed1 array of observed frequency counts of the first data set
|
||||
* @param observed2 array of observed frequency counts of the second data set
|
||||
* @return chiSquare statistic
|
||||
* @throws IllegalArgumentException if preconditions are not met
|
||||
*/
|
||||
double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
|
||||
throws IllegalArgumentException;
|
||||
|
||||
/**
|
||||
* <p>Returns the <i>observed significance level</i>, or <a href=
|
||||
* "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a Chi-Square two sample test comparing
|
||||
* bin frequency counts in <code>observed1</code> and
|
||||
* <code>observed2</code>.
|
||||
* </p>
|
||||
* <p>The number returned is the smallest significance level at which one
|
||||
* can reject the null hypothesis that the observed counts conform to the
|
||||
* same distribution.
|
||||
* </p>
|
||||
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
|
||||
* on the formula used to compute the test statistic. The degrees of
|
||||
* of freedom used to perform the test is one less than the common length
|
||||
* of the input observed count arrays.
|
||||
* </p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>Observed counts must be non-negative.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific bin must not both be zero.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific sample must not all be 0.
|
||||
* </li>
|
||||
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
|
||||
* have the same length and
|
||||
* their common length must be at least 2.
|
||||
* </li></ul><p>
|
||||
* If any of the preconditions are not met, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
*
|
||||
* @param observed1 array of observed frequency counts of the first data set
|
||||
* @param observed2 array of observed frequency counts of the second data set
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if preconditions are not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
* <p>Performs a Chi-Square two sample test comparing two binned data
|
||||
* sets. The test evaluates the null hypothesis that the two lists of
|
||||
* observed counts conform to the same frequency distribution, with
|
||||
* significance level <code>alpha</code>. Returns true iff the null
|
||||
* hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
|
||||
* </p>
|
||||
* <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
|
||||
* details on the formula used to compute the Chisquare statistic used
|
||||
* in the test. The degrees of of freedom used to perform the test is
|
||||
* one less than the common length of the input observed count arrays.
|
||||
* </p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>Observed counts must be non-negative.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific bin must not both be zero.
|
||||
* </li>
|
||||
* <li>Observed counts for a specific sample must not all be 0.
|
||||
* </li>
|
||||
* <li>The arrays <code>observed1</code> and <code>observed2</code> must
|
||||
* have the same length and their common length must be at least 2.
|
||||
* </li>
|
||||
* <li> <code> 0 < alpha < 0.5 </code>
|
||||
* </li></ul><p>
|
||||
* If any of the preconditions are not met, an
|
||||
* <code>IllegalArgumentException</code> is thrown.
|
||||
*
|
||||
* @param observed1 array of observed frequency counts of the first data set
|
||||
* @param observed2 array of observed frequency counts of the second data set
|
||||
* @param alpha significance level of the test
|
||||
* @return true iff null hypothesis can be rejected with confidence
|
||||
* 1 - alpha
|
||||
* @throws IllegalArgumentException if preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, double alpha)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
}
|
|
@ -32,7 +32,7 @@ public class ChiSquareFactoryTest extends ChiSquareTestTest {
|
|||
|
||||
public void setUp() {
|
||||
super.setUp();
|
||||
testStatistic = TestUtils.getChiSquareTest();
|
||||
testStatistic = TestUtils.getUnknownDistributionChiSquareTest();
|
||||
}
|
||||
|
||||
public static Test suite() {
|
||||
|
|
|
@ -28,7 +28,7 @@ import junit.framework.TestSuite;
|
|||
|
||||
public class ChiSquareTestTest extends TestCase {
|
||||
|
||||
protected ChiSquareTest testStatistic = new ChiSquareTestImpl();
|
||||
protected UnknownDistributionChiSquareTest testStatistic = new ChiSquareTestImpl();
|
||||
|
||||
public ChiSquareTestTest(String name) {
|
||||
super(name);
|
||||
|
|
Loading…
Reference in New Issue