Added support for equal variances tests.
git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@141271 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7f04479e5c
commit
73d8935012
|
@ -21,7 +21,7 @@ import org.apache.commons.math.stat.univariate.StatisticalSummary;
|
|||
/**
|
||||
* An interface for Student's t-tests.
|
||||
*
|
||||
* @version $Revision: 1.4 $ $Date: 2004/05/24 05:29:05 $
|
||||
* @version $Revision: 1.5 $ $Date: 2004/06/02 13:08:55 $
|
||||
*/
|
||||
public interface TTest {
|
||||
|
||||
|
@ -62,14 +62,15 @@ public interface TTest {
|
|||
* value by 2.
|
||||
* <p>
|
||||
* This test is equivalent to a one-sample t-test computed using
|
||||
* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample array
|
||||
* consisting of the signed differences between corresponding elements of
|
||||
* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
|
||||
* array consisting of the signed differences between corresponding elements of
|
||||
* <code>sample1</code> and <code>sample2.</code>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the p-value depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The input array lengths must be the same and their common length must
|
||||
|
@ -86,7 +87,7 @@ public interface TTest {
|
|||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
* Performs a paired t-test</a> evaluating that null hypothesis that the
|
||||
* Performs a paired t-test</a> evaluating the null hypothesis that the
|
||||
* mean of the paired differences between <code>sample1</code> and
|
||||
* <code>sample2</code> is 0 in favor of the two-sided alternative that the
|
||||
* mean paired difference is not equal to 0, with significance level
|
||||
|
@ -99,10 +100,12 @@ public interface TTest {
|
|||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The input array lengths must be the same and their common length must be at least 2.
|
||||
* <li>The input array lengths must be the same and their common length
|
||||
* must be at least 2.
|
||||
* </li>
|
||||
* <li> <code> 0 < alpha < 0.5 </code>
|
||||
* </li></ul>
|
||||
|
@ -138,8 +141,8 @@ public interface TTest {
|
|||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
|
||||
* t statistic </a> to use in comparing the dataset described by <code>sampleStats</code>
|
||||
* to <code>mu</code>.
|
||||
* t statistic </a> to use in comparing the mean of the dataset described by
|
||||
* <code>sampleStats</code> to <code>mu</code>.
|
||||
* <p>
|
||||
* This statistic can be used to perform a one sample t-test for the mean.
|
||||
* <p>
|
||||
|
@ -157,32 +160,73 @@ public interface TTest {
|
|||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* 2-sample t statistic </a>, without the assumption of equal sample variances.
|
||||
* 2-sample t statistic. </a>
|
||||
* <p>
|
||||
* This statistic can be used to perform a two-sample t-test to compare
|
||||
* sample means.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (1) <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
|
||||
* <p>
|
||||
* where <strong><code>n1</code></strong> is the size of first sample;
|
||||
* <strong><code> n2</code></strong> is the size of second sample;
|
||||
* <strong><code> m1</code></strong> is the mean of first sample;
|
||||
* <strong><code> m2</code></strong> is the mean of second sample</li>
|
||||
* </ul>
|
||||
* and <strong><code>var</code></strong> is the pooled variance estimate:
|
||||
* <p>
|
||||
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
|
||||
* <p>
|
||||
* with <strong><code>var1<code></strong> the variance of the first sample and
|
||||
* <strong><code>var2</code></strong> the variance of the second sample.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>false</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (2) <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array lengths must both be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @param equalVariances are the sample variances assumed equal?
|
||||
* @return t statistic
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if the statistic can not be computed do to a
|
||||
* convergence or other numerical error.
|
||||
*/
|
||||
double t(double[] sample1, double[] sample2)
|
||||
double t(double[] sample1, double[] sample2, boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* 2-sample t statistic </a>, comparing the means of the datasets described
|
||||
* by two {@link StatisticalSummary} instances without the assumption of equal sample variances.
|
||||
* by two {@link StatisticalSummary} instances.
|
||||
* <p>
|
||||
* This statistic can be used to perform a two-sample t-test to compare
|
||||
* sample means.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (1) <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
|
||||
* <p>
|
||||
* where <strong><code>n1</code></strong> is the size of first sample;
|
||||
* <strong><code> n2</code></strong> is the size of second sample;
|
||||
* <strong><code> m1</code></strong> is the mean of first sample;
|
||||
* <strong><code> m2</code></strong> is the mean of second sample</li>
|
||||
* </ul>
|
||||
* and <strong><code>var</code></strong> is the pooled variance estimate:
|
||||
* <p>
|
||||
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
|
||||
* <p>
|
||||
* with <strong><code>var1<code></strong> the variance of the first sample and
|
||||
* <strong><code>var2</code></strong> the variance of the second sample.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>false</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (2) <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The datasets described by the two Univariates must each contain
|
||||
|
@ -191,10 +235,12 @@ public interface TTest {
|
|||
*
|
||||
* @param sampleStats1 StatisticalSummary describing data from the first sample
|
||||
* @param sampleStats2 StatisticalSummary describing data from the second sample
|
||||
* @param equalVariances are the sample variances assumed equal?
|
||||
* @return t statistic
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
*/
|
||||
double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
|
||||
double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
boolean equalVariances)
|
||||
throws IllegalArgumentException;
|
||||
|
||||
/**
|
||||
|
@ -345,15 +391,25 @@ public interface TTest {
|
|||
* equal in favor of the two-sided alternative that they are different.
|
||||
* For a one-sided test, divide the returned value by 2.
|
||||
* <p>
|
||||
* The test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the p-value depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array lengths must both be at least 2.
|
||||
|
@ -361,11 +417,12 @@ public interface TTest {
|
|||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return p-value for t-test
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
double tTest(double[] sample1, double[] sample2)
|
||||
double tTest(double[] sample1, double[] sample2, boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
|
@ -378,25 +435,36 @@ public interface TTest {
|
|||
* equal can be rejected with confidence <code>1 - alpha</code>. To
|
||||
* perform a 1-sided test, use <code>alpha / 2</code>
|
||||
* <p>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Examples:</strong><br><ol>
|
||||
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
|
||||
* the 95% level, use <br><code>tTest(sample1, sample2, 0.05) </code>
|
||||
* the 95% level, under the assumption of equal subpopulation variances,
|
||||
* use <br><code>tTest(sample1, sample2, 0.05, true) </code>
|
||||
* </li>
|
||||
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
|
||||
* at the 99% level, first verify that the measured mean of
|
||||
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
|
||||
* and then use <br><code>tTest(sample1, sample2, 0.005) </code>
|
||||
* at the 99% level without assuming equal variances, first verify that the measured
|
||||
* mean of <code>sample 1</code> is less than the mean of <code>sample 2</code>
|
||||
* and then use <br><code>tTest(sample1, sample2, 0.005, false) </code>
|
||||
* </li></ol>
|
||||
* <p>
|
||||
* The test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array lengths must both be at least 2.
|
||||
|
@ -407,12 +475,14 @@ public interface TTest {
|
|||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return true if the null hypothesis can be rejected with
|
||||
* confidence 1 - alpha
|
||||
* @throws IllegalArgumentException if the preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
boolean tTest(double[] sample1, double[] sample2, double alpha)
|
||||
boolean tTest(double[] sample1, double[] sample2, double alpha,
|
||||
boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
|
@ -426,10 +496,19 @@ public interface TTest {
|
|||
* equal in favor of the two-sided alternative that they are different.
|
||||
* For a one-sided test, divide the returned value by 2.
|
||||
* <p>
|
||||
* The test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the p-value depends on the assumptions of the parametric
|
||||
|
@ -441,13 +520,15 @@ public interface TTest {
|
|||
* at least 2 observations.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sampleStats1 StatisticalSummary describing data from the first sample
|
||||
* @param sampleStats2 StatisticalSummary describing data from the second sample
|
||||
* @param sampleStats1 StatisticalSummary describing data from the first sample
|
||||
* @param sampleStats2 StatisticalSummary describing data from the second sample
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return p-value for t-test
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
|
||||
double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException;
|
||||
|
||||
/**
|
||||
|
@ -460,26 +541,37 @@ public interface TTest {
|
|||
* equal can be rejected with confidence <code>1 - alpha</code>. To
|
||||
* perform a 1-sided test, use <code>alpha / 2</code>
|
||||
* <p>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Examples:</strong><br><ol>
|
||||
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
|
||||
* the 95% level, use
|
||||
* <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
|
||||
* the 95% level under the assumption of equal subpopulation variances, use
|
||||
* <br><code>tTest(sampleStats1, sampleStats2, 0.05, true) </code>
|
||||
* </li>
|
||||
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
|
||||
* at the 99% level, first verify that the measured mean of
|
||||
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
|
||||
* and then use <br><code>tTest(sampleStats1, sampleStats2, 0.005) </code>
|
||||
* at the 99% level without assuming that subpopulation variances are equal,
|
||||
* first verify that the measured mean of <code>sample 1</code> is less than
|
||||
* the mean of <code>sample 2</code> and then use
|
||||
* <br><code>tTest(sampleStats1, sampleStats2, 0.005, false) </code>
|
||||
* </li></ol>
|
||||
* <p>
|
||||
* The test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The datasets described by the two Univariates must each contain
|
||||
|
@ -491,12 +583,13 @@ public interface TTest {
|
|||
* @param sampleStats1 StatisticalSummary describing sample data values
|
||||
* @param sampleStats2 StatisticalSummary describing sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return true if the null hypothesis can be rejected with
|
||||
* confidence 1 - alpha
|
||||
* @throws IllegalArgumentException if the preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
double alpha)
|
||||
double alpha, boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException;
|
||||
}
|
||||
|
|
|
@ -15,8 +15,6 @@
|
|||
*/
|
||||
package org.apache.commons.math.stat.inference;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.math.MathException;
|
||||
import org.apache.commons.math.distribution.DistributionFactory;
|
||||
import org.apache.commons.math.distribution.TDistribution;
|
||||
|
@ -26,36 +24,33 @@ import org.apache.commons.math.stat.univariate.StatisticalSummary;
|
|||
/**
|
||||
* Implements t-test statistics defined in the {@link TTest} interface.
|
||||
*
|
||||
* @version $Revision: 1.4 $ $Date: 2004/06/01 00:44:24 $
|
||||
* @version $Revision: 1.5 $ $Date: 2004/06/02 13:08:55 $
|
||||
*/
|
||||
public class TTestImpl implements TTest, Serializable {
|
||||
public class TTestImpl implements TTest {
|
||||
|
||||
/** Serializable version identifier */
|
||||
static final long serialVersionUID = 3003851743922752186L;
|
||||
|
||||
public TTestImpl() {
|
||||
super();
|
||||
}
|
||||
|
||||
//----------------------------------------------- Protected methods
|
||||
|
||||
/**
|
||||
* Computes approximate degrees of freedom for 2-sample t-test.
|
||||
*
|
||||
* @param v1 first sample variance
|
||||
* @param v2 second sample variance
|
||||
* @param n1 first sample n
|
||||
* @param n2 second sample n
|
||||
* @return approximate degrees of freedom
|
||||
*/
|
||||
protected double df(double v1, double v2, double n1, double n2) {
|
||||
return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
|
||||
((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
|
||||
(n2 * n2 * (n2 - 1d)));
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.commons.math.stat.inference.TTest#pairedT(double[], double[])
|
||||
/**
|
||||
* Computes a paired, 2-sample t-statistic based on the data in the input
|
||||
* arrays. The t-statistic returned is equivalent to what would be returned by
|
||||
* computing the one-sample t-statistic {@link #t(double, double[])}, with
|
||||
* <code>mu = 0</code> and the sample array consisting of the (signed)
|
||||
* differences between corresponding entries in <code>sample1</code> and
|
||||
* <code>sample2.</code>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The input arrays must have the same length and their common length
|
||||
* must be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @return t statistic
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if the statistic can not be computed do to a
|
||||
* convergence or other numerical error.
|
||||
*/
|
||||
public double pairedT(double[] sample1, double[] sample2)
|
||||
throws IllegalArgumentException, MathException {
|
||||
|
@ -69,8 +64,39 @@ public class TTestImpl implements TTest, Serializable {
|
|||
(double) sample1.length);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[])
|
||||
/**
|
||||
* Returns the <i>observed significance level</i>, or
|
||||
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a paired, two-sample, two-tailed t-test
|
||||
* based on the data in the input arrays.
|
||||
* <p>
|
||||
* The number returned is the smallest significance level
|
||||
* at which one can reject the null hypothesis that the mean of the paired
|
||||
* differences is 0 in favor of the two-sided alternative that the mean paired
|
||||
* difference is not equal to 0. For a one-sided test, divide the returned
|
||||
* value by 2.
|
||||
* <p>
|
||||
* This test is equivalent to a one-sample t-test computed using
|
||||
* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
|
||||
* array consisting of the signed differences between corresponding elements of
|
||||
* <code>sample1</code> and <code>sample2.</code>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the p-value depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The input array lengths must be the same and their common length must
|
||||
* be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @return p-value for t-test
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double pairedTTest(double[] sample1, double[] sample2)
|
||||
throws IllegalArgumentException, MathException {
|
||||
|
@ -80,8 +106,8 @@ public class TTestImpl implements TTest, Serializable {
|
|||
(double) sample1.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a paired t-test</a> evaluating that null hypothesis that the
|
||||
/**
|
||||
* Performs a paired t-test</a> evaluating the null hypothesis that the
|
||||
* mean of the paired differences between <code>sample1</code> and
|
||||
* <code>sample2</code> is 0 in favor of the two-sided alternative that the
|
||||
* mean paired difference is not equal to 0, with significance level
|
||||
|
@ -121,34 +147,15 @@ public class TTestImpl implements TTest, Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Computes t test statistic for 1-sample t-test.
|
||||
*
|
||||
* @param m sample mean
|
||||
* @param mu constant to test against
|
||||
* @param v sample variance
|
||||
* @param n sample n
|
||||
* @return t test statistic
|
||||
*/
|
||||
protected double t(double m, double mu, double v, double n) {
|
||||
return (m - mu) / Math.sqrt(v / n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes t test statistic for 2-sample t-test.
|
||||
*
|
||||
* @param m1 first sample mean
|
||||
* @param m2 second sample mean
|
||||
* @param v1 first sample variance
|
||||
* @param v2 second sample variance
|
||||
* @param n1 first sample n
|
||||
* @param n2 second sample n
|
||||
* @return t test statistic
|
||||
*/
|
||||
protected double t(double m1, double m2, double v1, double v2, double n1,double n2) {
|
||||
return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
|
||||
* t statistic </a> given observed values and a comparison constant.
|
||||
* <p>
|
||||
* This statistic can be used to perform a one sample t-test for the mean.
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array length must be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param mu comparison constant
|
||||
* @param observed array of values
|
||||
* @return t statistic
|
||||
|
@ -163,8 +170,18 @@ public class TTestImpl implements TTest, Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
|
||||
* t statistic </a> to use in comparing the mean of the dataset described by
|
||||
* <code>sampleStats</code> to <code>mu</code>.
|
||||
* <p>
|
||||
* This statistic can be used to perform a one sample t-test for the mean.
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li><code>observed.getN() > = 2</code>.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param mu comparison constant
|
||||
* @param sampleStats StatisticalSummary holding sample summary statitstics
|
||||
* @param sampleStats DescriptiveStatistics holding sample summary statitstics
|
||||
* @return t statistic
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
*/
|
||||
|
@ -177,28 +194,96 @@ public class TTestImpl implements TTest, Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* 2-sample t statistic. </a>
|
||||
* <p>
|
||||
* This statistic can be used to perform a two-sample t-test to compare
|
||||
* sample means.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (1) <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
|
||||
* <p>
|
||||
* where <strong><code>n1</code></strong> is the size of first sample;
|
||||
* <strong><code> n2</code></strong> is the size of second sample;
|
||||
* <strong><code> m1</code></strong> is the mean of first sample;
|
||||
* <strong><code> m2</code></strong> is the mean of second sample</li>
|
||||
* </ul>
|
||||
* and <strong><code>var</code></strong> is the pooled variance estimate:
|
||||
* <p>
|
||||
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
|
||||
* <p>
|
||||
* with <strong><code>var1<code></strong> the variance of the first sample and
|
||||
* <strong><code>var2</code></strong> the variance of the second sample.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>false</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (2) <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array lengths must both be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @return t-statistic
|
||||
* @param equalVariances are the sample variances assumed equal?
|
||||
* @return t statistic
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if the statistic can not be computed do to a
|
||||
* convergence or other numerical error.
|
||||
*/
|
||||
public double t(double[] sample1, double[] sample2)
|
||||
public double t(double[] sample1, double[] sample2, boolean equalVariances)
|
||||
throws IllegalArgumentException {
|
||||
if ((sample1 == null) || (sample2 == null ||
|
||||
Math.min(sample1.length, sample2.length) < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return t(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1),
|
||||
StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length);
|
||||
StatUtils.variance(sample2), (double) sample1.length,
|
||||
(double) sample2.length, equalVariances);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* 2-sample t statistic </a>, comparing the means of the datasets described
|
||||
* by two {@link StatisticalSummary} instances.
|
||||
* <p>
|
||||
* This statistic can be used to perform a two-sample t-test to compare
|
||||
* sample means.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (1) <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
|
||||
* <p>
|
||||
* where <strong><code>n1</code></strong> is the size of first sample;
|
||||
* <strong><code> n2</code></strong> is the size of second sample;
|
||||
* <strong><code> m1</code></strong> is the mean of first sample;
|
||||
* <strong><code> m2</code></strong> is the mean of second sample</li>
|
||||
* </ul>
|
||||
* and <strong><code>var</code></strong> is the pooled variance estimate:
|
||||
* <p>
|
||||
* <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
|
||||
* <p>
|
||||
* with <strong><code>var1<code></strong> the variance of the first sample and
|
||||
* <strong><code>var2</code></strong> the variance of the second sample.
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>false</code>, the t-statisitc is
|
||||
* <p>
|
||||
* (2) <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The datasets described by the two Univariates must each contain
|
||||
* at least 2 observations.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sampleStats1 StatisticalSummary describing data from the first sample
|
||||
* @param sampleStats2 StatisticalSummary describing data from the second sample
|
||||
* @param equalVariances are the sample variances assumed equal?
|
||||
* @return t statistic
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
*/
|
||||
public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
|
||||
public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
boolean equalVariances)
|
||||
throws IllegalArgumentException {
|
||||
if ((sampleStats1 == null) ||
|
||||
(sampleStats2 == null ||
|
||||
|
@ -206,9 +291,459 @@ public class TTestImpl implements TTest, Serializable {
|
|||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
|
||||
sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN());
|
||||
sampleStats2.getVariance(), (double) sampleStats1.getN(),
|
||||
(double) sampleStats2.getN(), equalVariances);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <i>observed significance level</i>, or
|
||||
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a one-sample, two-tailed t-test
|
||||
* comparing the mean of the input array with the constant <code>mu</code>.
|
||||
* <p>
|
||||
* The number returned is the smallest significance level
|
||||
* at which one can reject the null hypothesis that the mean equals
|
||||
* <code>mu</code> in favor of the two-sided alternative that the mean
|
||||
* is different from <code>mu</code>. For a one-sided test, divide the
|
||||
* returned value by 2.
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array length must be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sample array of sample data values
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(double mu, double[] sample)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sample == null) || (sample.length < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
|
||||
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
|
||||
* which <code>sample</code> is drawn equals <code>mu</code>.
|
||||
* <p>
|
||||
* Returns <code>true</code> iff the null hypothesis can be
|
||||
* rejected with confidence <code>1 - alpha</code>. To
|
||||
* perform a 1-sided test, use <code>alpha / 2</code>
|
||||
* <p>
|
||||
* <strong>Examples:</strong><br><ol>
|
||||
* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
|
||||
* the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
|
||||
* </li>
|
||||
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
|
||||
* at the 99% level, first verify that the measured sample mean is less
|
||||
* than <code>mu</code> and then use
|
||||
* <br><code>tTest(mu, sample, 0.005) </code>
|
||||
* </li></ol>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the one-sample
|
||||
* parametric t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array length must be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sample array of sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error computing the p-value
|
||||
*/
|
||||
public boolean tTest(double mu, double[] sample, double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(mu, sample) < alpha);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <i>observed significance level</i>, or
|
||||
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a one-sample, two-tailed t-test
|
||||
* comparing the mean of the dataset described by <code>sampleStats</code>
|
||||
* with the constant <code>mu</code>.
|
||||
* <p>
|
||||
* The number returned is the smallest significance level
|
||||
* at which one can reject the null hypothesis that the mean equals
|
||||
* <code>mu</code> in favor of the two-sided alternative that the mean
|
||||
* is different from <code>mu</code>. For a one-sided test, divide the
|
||||
* returned value by 2.
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The sample must contain at least 2 observations.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sampleStats StatisticalSummary describing sample data
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(double mu, StatisticalSummary sampleStats)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sampleStats == null) || (sampleStats.getN() < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
|
||||
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
|
||||
* which the dataset described by <code>stats</code> is drawn equals <code>mu</code>.
|
||||
* <p>
|
||||
* Returns <code>true</code> iff the null hypothesis can be
|
||||
* rejected with confidence <code>1 - alpha</code>. To
|
||||
* perform a 1-sided test, use <code>alpha / 2</code>
|
||||
* <p>
|
||||
* <strong>Examples:</strong><br><ol>
|
||||
* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
|
||||
* the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
|
||||
* </li>
|
||||
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
|
||||
* at the 99% level, first verify that the measured sample mean is less
|
||||
* than <code>mu</code> and then use
|
||||
* <br><code>tTest(mu, sampleStats, 0.005) </code>
|
||||
* </li></ol>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the one-sample
|
||||
* parametric t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The sample must include at least 2 observations.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sampleStats StatisticalSummary describing sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public boolean tTest( double mu, StatisticalSummary sampleStats, double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(mu, sampleStats) < alpha);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <i>observed significance level</i>, or
|
||||
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a two-sample, two-tailed t-test
|
||||
* comparing the means of the input arrays.
|
||||
* <p>
|
||||
* The number returned is the smallest significance level
|
||||
* at which one can reject the null hypothesis that the two means are
|
||||
* equal in favor of the two-sided alternative that they are different.
|
||||
* For a one-sided test, divide the returned value by 2.
|
||||
* <p>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the p-value depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array lengths must both be at least 2.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return p-value for t-test
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(double[] sample1, double[] sample2, boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sample1 == null) || (sample2 == null ||
|
||||
Math.min(sample1.length, sample2.length) < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data");
|
||||
}
|
||||
return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1),
|
||||
StatUtils.variance(sample2), (double) sample1.length,
|
||||
(double) sample2.length, equalVariances);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
|
||||
* two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
|
||||
* and <code>sample2</code> are drawn from populations with the same mean,
|
||||
* with significance level <code>alpha</code>.
|
||||
* <p>
|
||||
* Returns <code>true</code> iff the null hypothesis that the means are
|
||||
* equal can be rejected with confidence <code>1 - alpha</code>. To
|
||||
* perform a 1-sided test, use <code>alpha / 2</code>
|
||||
* <p>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Examples:</strong><br><ol>
|
||||
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
|
||||
* the 95% level, under the assumption of equal subpopulation variances,
|
||||
* use <br><code>tTest(sample1, sample2, 0.05, true) </code>
|
||||
* </li>
|
||||
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
|
||||
* at the 99% level without assuming equal variances, first verify that the measured
|
||||
* mean of <code>sample 1</code> is less than the mean of <code>sample 2</code>
|
||||
* and then use <br><code>tTest(sample1, sample2, 0.005, false) </code>
|
||||
* </li></ol>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The observed array lengths must both be at least 2.
|
||||
* </li>
|
||||
* <li> <code> 0 < alpha < 0.5 </code>
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return true if the null hypothesis can be rejected with
|
||||
* confidence 1 - alpha
|
||||
* @throws IllegalArgumentException if the preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
public boolean tTest(double[] sample1, double[] sample2, double alpha,
|
||||
boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(sample1, sample2, equalVariances) < alpha);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <i>observed significance level</i>, or
|
||||
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
|
||||
* p-value</a>, associated with a two-sample, two-tailed t-test
|
||||
* comparing the means of the datasets described by two Univariates.
|
||||
* <p>
|
||||
* The number returned is the smallest significance level
|
||||
* at which one can reject the null hypothesis that the two means are
|
||||
* equal in favor of the two-sided alternative that they are different.
|
||||
* For a one-sided test, divide the returned value by 2.
|
||||
* <p>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the p-value depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The datasets described by the two Univariates must each contain
|
||||
* at least 2 observations.
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sampleStats1 StatisticalSummary describing data from the first sample
|
||||
* @param sampleStats2 StatisticalSummary describing data from the second sample
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return p-value for t-test
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sampleStats1 == null) || (sampleStats2 == null ||
|
||||
Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
|
||||
sampleStats2.getVariance(), (double) sampleStats1.getN(),
|
||||
(double) sampleStats2.getN(), equalVariances);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
|
||||
* two-sided t-test</a> evaluating the null hypothesis that <code>sampleStats1</code>
|
||||
* and <code>sampleStats2</code> describe datasets drawn from populations with the
|
||||
* same mean, with significance level <code>alpha</code>.
|
||||
* <p>
|
||||
* Returns <code>true</code> iff the null hypothesis that the means are
|
||||
* equal can be rejected with confidence <code>1 - alpha</code>. To
|
||||
* perform a 1-sided test, use <code>alpha / 2</code>
|
||||
* <p>
|
||||
* If the <code>equalVariances</code> parameter is <code>false,</code>
|
||||
* the test does not assume that the underlying popuation variances are
|
||||
* equal and it uses approximated degrees of freedom computed from the
|
||||
* sample data to compute the p-value. In this case, formula (1) for the
|
||||
* {@link #t(double[], double[], boolean)} statistic is used
|
||||
* and the Welch-Satterthwaite approximation to the degrees of freedom is used,
|
||||
* as described
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
|
||||
* here.</a>
|
||||
* <p>
|
||||
* If <code>equalVariances</code> is <code>true</code>, a pooled variance
|
||||
* estimate is used to compute the t-statistic (formula (2)) and the sum of the
|
||||
* sample sizes minus 2 is used as the degrees of freedom.
|
||||
* <p>
|
||||
* <strong>Examples:</strong><br><ol>
|
||||
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
|
||||
* the 95% level under the assumption of equal subpopulation variances, use
|
||||
* <br><code>tTest(sampleStats1, sampleStats2, 0.05, true) </code>
|
||||
* </li>
|
||||
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
|
||||
* at the 99% level without assuming that subpopulation variances are equal,
|
||||
* first verify that the measured mean of <code>sample 1</code> is less than
|
||||
* the mean of <code>sample 2</code> and then use
|
||||
* <br><code>tTest(sampleStats1, sampleStats2, 0.005, false) </code>
|
||||
* </li></ol>
|
||||
* <p>
|
||||
* <strong>Usage Note:</strong><br>
|
||||
* The validity of the test depends on the assumptions of the parametric
|
||||
* t-test procedure, as discussed
|
||||
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
|
||||
* here</a>
|
||||
* <p>
|
||||
* <strong>Preconditions</strong>: <ul>
|
||||
* <li>The datasets described by the two Univariates must each contain
|
||||
* at least 2 observations.
|
||||
* </li>
|
||||
* <li> <code> 0 < alpha < 0.5 </code>
|
||||
* </li></ul>
|
||||
*
|
||||
* @param sampleStats1 StatisticalSummary describing sample data values
|
||||
* @param sampleStats2 StatisticalSummary describing sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @param equalVariances are sample variances assumed to be equal?
|
||||
* @return true if the null hypothesis can be rejected with
|
||||
* confidence 1 - alpha
|
||||
* @throws IllegalArgumentException if the preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
public boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
double alpha, boolean equalVariances)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(sampleStats1, sampleStats2, equalVariances) < alpha);
|
||||
}
|
||||
|
||||
//----------------------------------------------- Protected methods
|
||||
|
||||
/**
|
||||
* Computes approximate degrees of freedom for 2-sample t-test.
|
||||
*
|
||||
* @param v1 first sample variance
|
||||
* @param v2 second sample variance
|
||||
* @param n1 first sample n
|
||||
* @param n2 second sample n
|
||||
* @return approximate degrees of freedom
|
||||
*/
|
||||
protected double df(double v1, double v2, double n1, double n2) {
|
||||
return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
|
||||
((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
|
||||
(n2 * n2 * (n2 - 1d)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes t test statistic for 1-sample t-test.
|
||||
*
|
||||
* @param m sample mean
|
||||
* @param mu constant to test against
|
||||
* @param v sample variance
|
||||
* @param n sample n
|
||||
* @return t test statistic
|
||||
*/
|
||||
protected double t(double m, double mu, double v, double n) {
|
||||
return (m - mu) / Math.sqrt(v / n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes t test statistic for 2-sample t-test.
|
||||
* If equalVariance is true, the pooled variance
|
||||
* estimate is computed and used.
|
||||
*
|
||||
* @param m1 first sample mean
|
||||
* @param m2 second sample mean
|
||||
* @param v1 first sample variance
|
||||
* @param v2 second sample variance
|
||||
* @param n1 first sample n
|
||||
* @param n2 second sample n
|
||||
* @return t test statistic
|
||||
*/
|
||||
protected double t(double m1, double m2, double v1, double v2, double n1,
|
||||
double n2, boolean equalVariances) {
|
||||
if (equalVariances) {
|
||||
double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
|
||||
return (m1 - m2) / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2));
|
||||
} else {
|
||||
return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes p-value for 2-sided, 1-sample t-test.
|
||||
*
|
||||
|
@ -229,6 +764,8 @@ public class TTestImpl implements TTest, Serializable {
|
|||
|
||||
/**
|
||||
* Computes p-value for 2-sided, 2-sample t-test.
|
||||
* If equalVariances is true, the sum of the sample sizes minus 2
|
||||
* is used as df; otherwise df is approximated from the data.
|
||||
*
|
||||
* @param m1 first sample mean
|
||||
* @param m2 second sample mean
|
||||
|
@ -236,147 +773,22 @@ public class TTestImpl implements TTest, Serializable {
|
|||
* @param v2 second sample variance
|
||||
* @param n1 first sample n
|
||||
* @param n2 second sample n
|
||||
* @param equalVariances are variances assumed equal?
|
||||
* @return p-value
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
protected double tTest(double m1, double m2, double v1, double v2, double n1, double n2)
|
||||
protected double tTest(double m1, double m2, double v1, double v2,
|
||||
double n1, double n2, boolean equalVariances)
|
||||
throws MathException {
|
||||
double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
|
||||
double t = Math.abs(t(m1, m2, v1, v2, n1, n2, equalVariances));
|
||||
double degreesOfFreedom = 0;
|
||||
if (equalVariances) {
|
||||
degreesOfFreedom = (double) (n1 + n2 - 2);
|
||||
} else {
|
||||
degreesOfFreedom= df(v1, v2, n1, n2);
|
||||
}
|
||||
TDistribution tDistribution =
|
||||
DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1, n2));
|
||||
DistributionFactory.newInstance().createTDistribution(degreesOfFreedom);
|
||||
return 1.0 - tDistribution.cumulativeProbability(-t, t);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sample array of sample data values
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(double mu, double[] sample)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sample == null) || (sample.length < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sample array of sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public boolean tTest(double mu, double[] sample, double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(mu, sample) < alpha);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sampleStats StatisticalSummary describing sample data
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(double mu, StatisticalSummary sampleStats)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sampleStats == null) || (sampleStats.getN() < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mu constant value to compare sample mean against
|
||||
* @param sampleStats StatisticalSummary describing sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @return p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(mu, sampleStats) < alpha);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @return tTest p-value
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(double[] sample1, double[] sample2)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sample1 == null) || (sample2 == null ||
|
||||
Math.min(sample1.length, sample2.length) < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data");
|
||||
}
|
||||
return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1),
|
||||
StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sample1 array of sample data values
|
||||
* @param sample2 array of sample data values
|
||||
* @param alpha significance level
|
||||
* @return true if the null hypothesis can be rejected with
|
||||
* confidence 1 - alpha
|
||||
* @throws IllegalArgumentException if the preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
public boolean tTest(double[] sample1, double[] sample2, double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(sample1, sample2) < alpha);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sampleStats1 StatisticalSummary describing data from the first sample
|
||||
* @param sampleStats2 StatisticalSummary describing data from the second sample
|
||||
* @return p-value for t-test
|
||||
* @throws IllegalArgumentException if the precondition is not met
|
||||
* @throws MathException if an error occurs computing the p-value
|
||||
*/
|
||||
public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((sampleStats1 == null) || (sampleStats2 == null ||
|
||||
Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
|
||||
throw new IllegalArgumentException("insufficient data for t statistic");
|
||||
}
|
||||
return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
|
||||
sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sampleStats1 StatisticalSummary describing sample data values
|
||||
* @param sampleStats2 StatisticalSummary describing sample data values
|
||||
* @param alpha significance level of the test
|
||||
* @return true if the null hypothesis can be rejected with
|
||||
* confidence 1 - alpha
|
||||
* @throws IllegalArgumentException if the preconditions are not met
|
||||
* @throws MathException if an error occurs performing the test
|
||||
*/
|
||||
public boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
|
||||
double alpha)
|
||||
throws IllegalArgumentException, MathException {
|
||||
if ((alpha <= 0) || (alpha > 0.5)) {
|
||||
throw new IllegalArgumentException("bad significance level: " + alpha);
|
||||
}
|
||||
return (tTest(sampleStats1, sampleStats2) < alpha);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,18 +23,27 @@ import org.apache.commons.math.stat.univariate.SummaryStatistics;
|
|||
/**
|
||||
* Test cases for the TTestImpl class.
|
||||
*
|
||||
* @version $Revision: 1.4 $ $Date: 2004/06/01 00:44:24 $
|
||||
* @version $Revision: 1.5 $ $Date: 2004/06/02 13:08:55 $
|
||||
*/
|
||||
|
||||
public final class TTestTest extends TestCase {
|
||||
|
||||
private TTestImpl testStatistic = new TTestImpl();
|
||||
|
||||
private double[] tooShortObs = { 1.0 };
|
||||
private double[] nullObserved = null;
|
||||
private double[] emptyObs = {};
|
||||
private SummaryStatistics emptyStats = SummaryStatistics.newInstance();
|
||||
private SummaryStatistics nullStats = null;
|
||||
SummaryStatistics tooShortStats = null;
|
||||
|
||||
public TTestTest(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public void setUp() {
|
||||
tooShortStats = SummaryStatistics.newInstance();
|
||||
tooShortStats.addValue(0d);
|
||||
}
|
||||
|
||||
public static Test suite() {
|
||||
|
@ -43,7 +52,7 @@ public final class TTestTest extends TestCase {
|
|||
return suite;
|
||||
}
|
||||
|
||||
public void testT() throws Exception {
|
||||
public void testOneSampleT() throws Exception {
|
||||
double[] observed =
|
||||
{93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, 94.0, 101.0, 88.0, 98.0, 94.0, 101.0, 92.0, 95.0 };
|
||||
double mu = 100.0;
|
||||
|
@ -56,7 +65,6 @@ public final class TTestTest extends TestCase {
|
|||
assertEquals("t statistic", -2.82, testStatistic.t(mu, observed), 10E-3);
|
||||
assertEquals("t statistic", -2.82, testStatistic.t(mu, sampleStats), 10E-3);
|
||||
|
||||
double[] nullObserved = null;
|
||||
try {
|
||||
testStatistic.t(mu, nullObserved);
|
||||
fail("arguments too short, IllegalArgumentException expected");
|
||||
|
@ -64,7 +72,6 @@ public final class TTestTest extends TestCase {
|
|||
// expected
|
||||
}
|
||||
|
||||
SummaryStatistics nullStats = null;
|
||||
try {
|
||||
testStatistic.t(mu, nullStats);
|
||||
fail("arguments too short, IllegalArgumentException expected");
|
||||
|
@ -72,15 +79,13 @@ public final class TTestTest extends TestCase {
|
|||
// expected
|
||||
}
|
||||
|
||||
double[] emptyObs = {};
|
||||
try {
|
||||
testStatistic.t(mu, emptyObs);
|
||||
fail("arguments too short, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
SummaryStatistics emptyStats =SummaryStatistics.newInstance();
|
||||
|
||||
try {
|
||||
testStatistic.t(mu, emptyStats);
|
||||
fail("arguments too short, IllegalArgumentException expected");
|
||||
|
@ -88,7 +93,6 @@ public final class TTestTest extends TestCase {
|
|||
// expected
|
||||
}
|
||||
|
||||
double[] tooShortObs = { 1.0 };
|
||||
try {
|
||||
testStatistic.t(mu, tooShortObs);
|
||||
fail("insufficient data to compute t statistic, IllegalArgumentException expected");
|
||||
|
@ -102,8 +106,6 @@ public final class TTestTest extends TestCase {
|
|||
// expected
|
||||
}
|
||||
|
||||
SummaryStatistics tooShortStats = SummaryStatistics.newInstance();
|
||||
tooShortStats.addValue(0d);
|
||||
try {
|
||||
testStatistic.t(mu, tooShortStats);
|
||||
fail("insufficient data to compute t statistic, IllegalArgumentException expected");
|
||||
|
@ -116,7 +118,9 @@ public final class TTestTest extends TestCase {
|
|||
} catch (IllegalArgumentException ex) {
|
||||
// exptected
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testOneSampleTTest() throws Exception {
|
||||
double[] oneSidedP =
|
||||
{2d, 0d, 6d, 6d, 3d, 3d, 2d, 3d, -6d, 6d, 6d, 6d, 3d, 0d, 1d, 1d, 0d, 2d, 3d, 3d };
|
||||
SummaryStatistics oneSidedPStats = SummaryStatistics.newInstance();
|
||||
|
@ -145,7 +149,10 @@ public final class TTestTest extends TestCase {
|
|||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void testTwoSampleTHeterscedastic() throws Exception {
|
||||
double[] sample1 = { 7d, -4d, 18d, 17d, -3d, -5d, 1d, 10d, 11d, -2d };
|
||||
double[] sample2 = { -1d, 12d, -1d, -3d, 3d, -5d, 5d, 2d, -11d, -1d, -3d };
|
||||
SummaryStatistics sampleStats1 = SummaryStatistics.newInstance();
|
||||
|
@ -158,80 +165,109 @@ public final class TTestTest extends TestCase {
|
|||
}
|
||||
|
||||
// Target comparison values computed using R version 1.8.1 (Linux version)
|
||||
assertEquals("two sample t stat", 1.6037, testStatistic.t(sample1, sample2), 10E-4);
|
||||
assertEquals("two sample t stat", 1.6037, testStatistic.t(sampleStats1, sampleStats2), 10E-4);
|
||||
assertEquals("two sample p value", 0.0644, testStatistic.tTest(sample1, sample2) / 2d, 10E-4);
|
||||
assertEquals("two sample p value", 0.0644, testStatistic.tTest(sampleStats1, sampleStats2) / 2d, 10E-4);
|
||||
|
||||
assertTrue("two sample t-test reject", testStatistic.tTest(sample1, sample2, 0.2));
|
||||
assertTrue("two sample t-test reject", testStatistic.tTest(sampleStats1, sampleStats2, 0.2));
|
||||
assertTrue("two sample t-test accept", !testStatistic.tTest(sample1, sample2, 0.1));
|
||||
assertTrue("two sample t-test accept", !testStatistic.tTest(sampleStats1, sampleStats2, 0.1));
|
||||
assertEquals("two sample heteroscedastic t stat", 1.603717,
|
||||
testStatistic.t(sample1, sample2, false), 1E-6);
|
||||
assertEquals("two sample heteroscedastic t stat", 1.603717,
|
||||
testStatistic.t(sampleStats1, sampleStats2, false), 1E-6);
|
||||
assertEquals("two sample heteroscedastic p value", 0.1288394,
|
||||
testStatistic.tTest(sample1, sample2, false), 1E-7);
|
||||
assertEquals("two sample heteroscedastic p value", 0.1288394,
|
||||
testStatistic.tTest(sampleStats1, sampleStats2, false), 1E-7);
|
||||
assertTrue("two sample heteroscedastic t-test reject",
|
||||
testStatistic.tTest(sample1, sample2, 0.2, false));
|
||||
assertTrue("two sample heteroscedastic t-test reject",
|
||||
testStatistic.tTest(sampleStats1, sampleStats2, 0.2, false));
|
||||
assertTrue("two sample heteroscedastic t-test accept",
|
||||
!testStatistic.tTest(sample1, sample2, 0.1, false));
|
||||
assertTrue("two sample heteroscedastic t-test accept",
|
||||
!testStatistic.tTest(sampleStats1, sampleStats2, 0.1, false));
|
||||
|
||||
try {
|
||||
testStatistic.tTest(sample1, sample2, 95);
|
||||
testStatistic.tTest(sample1, sample2, .95, false);
|
||||
fail("alpha out of range, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// exptected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.tTest(sampleStats1, sampleStats2, 95);
|
||||
testStatistic.tTest(sampleStats1, sampleStats2, .95, false);
|
||||
fail("alpha out of range, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.tTest(sample1, tooShortObs, .01);
|
||||
testStatistic.tTest(sample1, tooShortObs, .01, false);
|
||||
fail("insufficient data, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.tTest(sampleStats1, tooShortStats, .01);
|
||||
testStatistic.tTest(sampleStats1, tooShortStats, .01, false);
|
||||
fail("insufficient data, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.tTest(sample1, tooShortObs);
|
||||
testStatistic.tTest(sample1, tooShortObs, false);
|
||||
fail("insufficient data, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.tTest(sampleStats1, tooShortStats);
|
||||
testStatistic.tTest(sampleStats1, tooShortStats, false);
|
||||
fail("insufficient data, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.t(sample1, tooShortObs);
|
||||
testStatistic.t(sample1, tooShortObs, false);
|
||||
fail("insufficient data, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
testStatistic.t(sampleStats1, tooShortStats);
|
||||
testStatistic.t(sampleStats1, tooShortStats, false);
|
||||
fail("insufficient data, IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
public void testTwoSampleTHomoscedastic() throws Exception {
|
||||
double[] sample1 ={2, 4, 6, 8, 10};
|
||||
double[] sample2 = {4, 6, 8, 10, 16};
|
||||
SummaryStatistics sampleStats1 = SummaryStatistics.newInstance();
|
||||
for (int i = 0; i < sample1.length; i++) {
|
||||
sampleStats1.addValue(sample1[i]);
|
||||
}
|
||||
SummaryStatistics sampleStats2 = SummaryStatistics.newInstance();
|
||||
for (int i = 0; i < sample2.length; i++) {
|
||||
sampleStats2.addValue(sample2[i]);
|
||||
}
|
||||
|
||||
// Target comparison values computed using R version 1.8.1 (Linux version)
|
||||
assertEquals("two sample homoscedastic t stat", -1.120897,
|
||||
testStatistic.t(sample1, sample2, true), 10E-6);
|
||||
assertEquals("two sample homoscedastic p value", 0.2948490,
|
||||
testStatistic.tTest(sampleStats1, sampleStats2, true), 1E-6);
|
||||
assertTrue("two sample homoscedastic t-test reject",
|
||||
testStatistic.tTest(sample1, sample2, 0.3, true));
|
||||
assertTrue("two sample homoscedastic t-test accept",
|
||||
!testStatistic.tTest(sample1, sample2, 0.2, true));
|
||||
}
|
||||
|
||||
public void testSmallSamples() throws Exception {
|
||||
double[] sample1 = {1d, 3d};
|
||||
double[] sample2 = {4d, 5d};
|
||||
|
||||
// Target values computed using R, version 1.8.1 (linux version)
|
||||
assertEquals(-2.2361, testStatistic.t(sample1, sample2), 1E-4);
|
||||
assertEquals(0.1987, testStatistic.tTest(sample1, sample2), 1E-4);
|
||||
assertEquals(-2.2361, testStatistic.t(sample1, sample2, false), 1E-4);
|
||||
assertEquals(0.1987, testStatistic.tTest(sample1, sample2, false), 1E-4);
|
||||
}
|
||||
|
||||
public void testPaired() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue