Added check and rescaling of expected counts to sum to sum of expected
counts if necessary in ChiSquare test. JIRA: MATH-175 Reported and patched by Carl Anderson. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@610274 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fb4949bd07
commit
a3dc59a94d
|
@ -50,6 +50,11 @@ public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
|
|||
setDistribution(x);
|
||||
}
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* <p><strong>Note: </strong>This implementation rescales the
|
||||
* <code>expected</code> array if necessary to ensure that the sum of the
|
||||
* expected and observed counts are equal.</p>
|
||||
*
|
||||
* @param observed array of observed frequency counts
|
||||
* @param expected array of expected frequency counts
|
||||
* @return chi-square test statistic
|
||||
|
@ -58,8 +63,6 @@ public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
|
|||
*/
|
||||
public double chiSquare(double[] expected, long[] observed)
|
||||
throws IllegalArgumentException {
|
||||
double sumSq = 0.0d;
|
||||
double dev = 0.0d;
|
||||
if ((expected.length < 2) || (expected.length != observed.length)) {
|
||||
throw new IllegalArgumentException(
|
||||
"observed, expected array lengths incorrect");
|
||||
|
@ -68,14 +71,38 @@ public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
|
|||
throw new IllegalArgumentException(
|
||||
"observed counts must be non-negative and expected counts must be postive");
|
||||
}
|
||||
double sumExpected = 0d;
|
||||
double sumObserved = 0d;
|
||||
for (int i = 0; i < observed.length; i++) {
|
||||
sumExpected += expected[i];
|
||||
sumObserved += observed[i];
|
||||
}
|
||||
double ratio = 1.0d;
|
||||
boolean rescale = false;
|
||||
if (Math.abs(sumExpected - sumObserved) > 10E-6) {
|
||||
ratio = sumObserved / sumExpected;
|
||||
rescale = true;
|
||||
}
|
||||
double sumSq = 0.0d;
|
||||
double dev = 0.0d;
|
||||
for (int i = 0; i < observed.length; i++) {
|
||||
if (rescale) {
|
||||
dev = ((double) observed[i] - ratio * expected[i]);
|
||||
sumSq += dev * dev / (ratio * expected[i]);
|
||||
} else {
|
||||
dev = ((double) observed[i] - expected[i]);
|
||||
sumSq += dev * dev / expected[i];
|
||||
}
|
||||
}
|
||||
return sumSq;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* <p><strong>Note: </strong>This implementation rescales the
|
||||
* <code>expected</code> array if necessary to ensure that the sum of the
|
||||
* expected and observed counts are equal.</p>
|
||||
*
|
||||
* @param observed array of observed frequency counts
|
||||
* @param expected array of exptected frequency counts
|
||||
* @return p-value
|
||||
|
@ -90,6 +117,11 @@ public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
|
|||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* <p><strong>Note: </strong>This implementation rescales the
|
||||
* <code>expected</code> array if necessary to ensure that the sum of the
|
||||
* expected and observed counts are equal.</p>
|
||||
*
|
||||
* @param observed array of observed frequency counts
|
||||
* @param expected array of exptected frequency counts
|
||||
* @param alpha significance level of the test
|
||||
|
|
|
@ -49,8 +49,9 @@ verifyTable <- function(counts, expectedP, expectedStat, tol, desc) {
|
|||
|
||||
verifyHomogeneity <- function(obs, exp, expectedP, expectedStat,
|
||||
tol, desc) {
|
||||
chi <- sum((obs - exp)^2/exp)
|
||||
p <- 1 - pchisq(sum((obs - exp)^2/exp), length(obs) - 1)
|
||||
results <- chisq.test(obs,p=exp,rescale.p=TRUE)
|
||||
chi <- results$statistic
|
||||
p <- results$p.value
|
||||
if (assertEquals(expectedP, p, tol, "p-value")) {
|
||||
displayPadded(c(desc, " p-value test"), SUCCEEDED, WIDTH)
|
||||
} else {
|
||||
|
@ -73,14 +74,14 @@ verifyHomogeneity(observed, expected, 0.904837418036, 0.2, tol,
|
|||
|
||||
observed <- c(500, 623, 72, 70, 31)
|
||||
expected <- c(485, 541, 82, 61, 37)
|
||||
verifyHomogeneity(observed, expected, 0.002512096, 16.4131070362, tol,
|
||||
"testChiSquare2")
|
||||
verifyHomogeneity(observed, expected, 0.06051952647453607, 9.023307936427388,
|
||||
tol, "testChiSquare2")
|
||||
|
||||
observed <- c(2372383, 584222, 257170, 17750155, 7903832, 489265,
|
||||
209628, 393899)
|
||||
expected <- c(3389119.5, 649136.6, 285745.4, 25357364.76, 11291189.78,
|
||||
543628.0, 232921.0, 437665.75)
|
||||
verifyHomogeneity(observed, expected, 0, 3624883.342907764, tol,
|
||||
verifyHomogeneity(observed, expected, 0, 114875.90421929007, tol,
|
||||
"testChiSquareLargeTestStatistic")
|
||||
|
||||
counts <- matrix(c(40, 22, 43, 91, 21, 28, 60, 10, 22), nc = 3);
|
||||
|
|
|
@ -57,10 +57,10 @@ public class ChiSquareTestTest extends TestCase {
|
|||
|
||||
long[] observed1 = { 500, 623, 72, 70, 31 };
|
||||
double[] expected1 = { 485, 541, 82, 61, 37 };
|
||||
assertEquals( "chi-square test statistic", 16.4131070362, testStatistic.chiSquare(expected1, observed1), 1E-10);
|
||||
assertEquals("chi-square p-value", 0.002512096, testStatistic.chiSquareTest(expected1, observed1), 1E-9);
|
||||
assertTrue("chi-square test reject", testStatistic.chiSquareTest(expected1, observed1, 0.003));
|
||||
assertTrue("chi-square test accept", !testStatistic.chiSquareTest(expected1, observed1, 0.002));
|
||||
assertEquals( "chi-square test statistic", 9.023307936427388, testStatistic.chiSquare(expected1, observed1), 1E-10);
|
||||
assertEquals("chi-square p-value", 0.06051952647453607, testStatistic.chiSquareTest(expected1, observed1), 1E-9);
|
||||
assertTrue("chi-square test reject", testStatistic.chiSquareTest(expected1, observed1, 0.08));
|
||||
assertTrue("chi-square test accept", !testStatistic.chiSquareTest(expected1, observed1, 0.05));
|
||||
|
||||
try {
|
||||
testStatistic.chiSquareTest(expected1, observed1, 95);
|
||||
|
@ -181,7 +181,7 @@ public class ChiSquareTestTest extends TestCase {
|
|||
double cst = csti.chiSquareTest(exp, obs);
|
||||
assertEquals("chi-square p-value", 0.0, cst, 1E-3);
|
||||
assertEquals( "chi-square test statistic",
|
||||
3624883.342907764, testStatistic.chiSquare(exp, obs), 1E-9);
|
||||
114875.90421929007, testStatistic.chiSquare(exp, obs), 1E-9);
|
||||
}
|
||||
|
||||
/** Contingency table containing zeros - PR # 32531 */
|
||||
|
|
|
@ -55,10 +55,10 @@ public class TestUtilsTest extends TestCase {
|
|||
|
||||
long[] observed1 = { 500, 623, 72, 70, 31 };
|
||||
double[] expected1 = { 485, 541, 82, 61, 37 };
|
||||
assertEquals( "chi-square test statistic", 16.4131070362, TestUtils.chiSquare(expected1, observed1), 1E-10);
|
||||
assertEquals("chi-square p-value", 0.002512096, TestUtils.chiSquareTest(expected1, observed1), 1E-9);
|
||||
assertTrue("chi-square test reject", TestUtils.chiSquareTest(expected1, observed1, 0.003));
|
||||
assertTrue("chi-square test accept", !TestUtils.chiSquareTest(expected1, observed1, 0.002));
|
||||
assertEquals( "chi-square test statistic", 9.023307936427388, TestUtils.chiSquare(expected1, observed1), 1E-10);
|
||||
assertEquals("chi-square p-value", 0.06051952647453607, TestUtils.chiSquareTest(expected1, observed1), 1E-9);
|
||||
assertTrue("chi-square test reject", TestUtils.chiSquareTest(expected1, observed1, 0.07));
|
||||
assertTrue("chi-square test accept", !TestUtils.chiSquareTest(expected1, observed1, 0.05));
|
||||
|
||||
try {
|
||||
TestUtils.chiSquareTest(expected1, observed1, 95);
|
||||
|
@ -179,7 +179,7 @@ public class TestUtilsTest extends TestCase {
|
|||
double cst = csti.chiSquareTest(exp, obs);
|
||||
assertEquals("chi-square p-value", 0.0, cst, 1E-3);
|
||||
assertEquals( "chi-square test statistic",
|
||||
3624883.342907764, TestUtils.chiSquare(exp, obs), 1E-9);
|
||||
114875.90421929007, TestUtils.chiSquare(exp, obs), 1E-9);
|
||||
}
|
||||
|
||||
/** Contingency table containing zeros - PR # 32531 */
|
||||
|
|
|
@ -117,6 +117,10 @@ Commons Math Release Notes</title>
|
|||
by exploiting the the fact that this method has access to the full
|
||||
array of data values.
|
||||
</action>
|
||||
<action dev="psteitz" type="fix" issue="MATH-175" due-to="Carl Anderson">
|
||||
Added check and rescaling of expected counts to sum to sum of expected
|
||||
counts if necessary in ChiSquare test.
|
||||
</action>
|
||||
</release>
|
||||
<release version="1.1" date="2005-12-17"
|
||||
description="This is a maintenance release containing bug fixes and enhancements.
|
||||
|
|
Loading…
Reference in New Issue