Added support for paired t-tests.

PR #29049
Reported by: Joel Freyss


git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@141243 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2004-05-24 05:29:05 +00:00
parent 6c07bd13a8
commit 403e4e4956
2 changed files with 480 additions and 347 deletions

View File

@ -21,9 +21,103 @@ import org.apache.commons.math.stat.univariate.StatisticalSummary;
/** /**
* An interface for Student's t-tests. * An interface for Student's t-tests.
* *
* @version $Revision: 1.3 $ $Date: 2004/05/23 05:45:11 $ * @version $Revision: 1.4 $ $Date: 2004/05/24 05:29:05 $
*/ */
public interface TTest { public interface TTest {
/**
* Computes a paired, 2-sample t-statistic based on the data in the input
* arrays. The t-statistic returned is equivalent to what would be returned by
* computing the one-sample t-statistic {@link #t(double, double[])}, with
* <code>mu = 0</code> and the sample array consisting of the (signed)
* differences between corresponding entries in <code>sample1</code> and
* <code>sample2.</code>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The input arrays must have the same length and their common length
* must be at least 2.
* </li></ul>
*
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if the statistic can not be computed do to a
* convergence or other numerical error.
*/
double pairedT(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException;
/**
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a paired, two-sample, two-tailed t-test
* based on the data in the input arrays.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean of the paired
* differences is 0 in favor of the two-sided alternative that the mean paired
* difference is not equal to 0. For a one-sided test, divide the returned
* value by 2.
* <p>
* This test is equivalent to a one-sample t-test computed using
* {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample array
* consisting of the signed differences between corresponding elements of
* <code>sample1</code> and <code>sample2.</code>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the p-value depends on the assumptions of the parametric
* t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The input array lengths must be the same and their common length must
* be at least 2.
* </li></ul>
*
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @return p-value for t-test
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
double pairedTTest(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException;
/**
* Performs a paired t-test</a> evaluating that null hypothesis that the
* mean of the paired differences between <code>sample1</code> and
* <code>sample2</code> is 0 in favor of the two-sided alternative that the
* mean paired difference is not equal to 0, with significance level
* <code>alpha</code>.
* <p>
* Returns <code>true</code> iff the null hypothesis can be rejected with
* confidence <code>1 - alpha</code>. To perform a 1-sided test, use
* <code>alpha / 2</code>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
* t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The input array lengths must be the same and their common length must be at least 2.
* </li>
* <li> <code> 0 < alpha < 0.5 </code>
* </li></ul>
*
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @param alpha significance level of the test
* @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
*/
boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
throws IllegalArgumentException, MathException;
/** /**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
* t statistic </a> given observed values and a comparison constant. * t statistic </a> given observed values and a comparison constant.
@ -42,6 +136,25 @@ public interface TTest {
double t(double mu, double[] observed) double t(double mu, double[] observed)
throws IllegalArgumentException; throws IllegalArgumentException;
/**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
* t statistic </a> to use in comparing the dataset described by <code>sampleStats</code>
* to <code>mu</code>.
* <p>
* This statistic can be used to perform a one sample t-test for the mean.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li><code>observed.getN() > = 2</code>.
* </li></ul>
*
* @param mu comparison constant
* @param sampleStats DescriptiveStatistics holding sample summary statitstics
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
double t(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException;
/** /**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* 2-sample t statistic </a>, without the assumption of equal sample variances. * 2-sample t statistic </a>, without the assumption of equal sample variances.
@ -63,6 +176,164 @@ public interface TTest {
double t(double[] sample1, double[] sample2) double t(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException; throws IllegalArgumentException, MathException;
/**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* 2-sample t statistic </a>, comparing the means of the datasets described
* by two {@link StatisticalSummary} instances without the assumption of equal sample variances.
* <p>
* This statistic can be used to perform a two-sample t-test to compare
* sample means.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The datasets described by the two Univariates must each contain
* at least 2 observations.
* </li></ul>
*
* @param sampleStats1 StatisticalSummary describing data from the first sample
* @param sampleStats2 StatisticalSummary describing data from the second sample
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
throws IllegalArgumentException;
/**
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a one-sample, two-tailed t-test
* comparing the mean of the input array with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
* is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
* t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The observed array length must be at least 2.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sample array of sample data values
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
double tTest(double mu, double[] sample)
throws IllegalArgumentException, MathException;
/**
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
* which <code>sample</code> is drawn equals <code>mu</code>.
* <p>
* Returns <code>true</code> iff the null hypothesis can be
* rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha / 2</code>
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
* the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
* at the 99% level, first verify that the measured sample mean is less
* than <code>mu</code> and then use
* <br><code>tTest(mu, sample, 0.005) </code>
* </li></ol>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the one-sample
* parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The observed array length must be at least 2.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sample array of sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error computing the p-value
*/
boolean tTest(double mu, double[] sample, double alpha)
throws IllegalArgumentException, MathException;
/**
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a one-sample, two-tailed t-test
* comparing the mean of the dataset described by <code>sampleStats</code>
* with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
* is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
* t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The sample must contain at least 2 observations.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
double tTest(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException, MathException;
/**
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
* which the dataset described by <code>stats</code> is drawn equals <code>mu</code>.
* <p>
* Returns <code>true</code> iff the null hypothesis can be
* rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha / 2</code>
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
* the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
* at the 99% level, first verify that the measured sample mean is less
* than <code>mu</code> and then use
* <br><code>tTest(mu, sampleStats, 0.005) </code>
* </li></ol>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the one-sample
* parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The sample must include at least 2 observations.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
boolean tTest(double mu, StatisticalSummary sampleStats, double alpha)
throws IllegalArgumentException, MathException;
/** /**
* Returns the <i>observed significance level</i>, or * Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
@ -144,114 +415,6 @@ public interface TTest {
boolean tTest(double[] sample1, double[] sample2, double alpha) boolean tTest(double[] sample1, double[] sample2, double alpha)
throws IllegalArgumentException, MathException; throws IllegalArgumentException, MathException;
/**
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
* which <code>sample</code> is drawn equals <code>mu</code>.
* <p>
* Returns <code>true</code> iff the null hypothesis can be
* rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha / 2</code>
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
* the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
* at the 99% level, first verify that the measured sample mean is less
* than <code>mu</code> and then use
* <br><code>tTest(mu, sample, 0.005) </code>
* </li></ol>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the one-sample
* parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The observed array length must be at least 2.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sample array of sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error computing the p-value
*/
boolean tTest(double mu, double[] sample, double alpha)
throws IllegalArgumentException, MathException;
/**
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a one-sample, two-tailed t-test
* comparing the mean of the input array with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
* is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
* t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The observed array length must be at least 2.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sample array of sample data values
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
double tTest(double mu, double[] sample)
throws IllegalArgumentException, MathException;
/**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
* t statistic </a> to use in comparing the dataset described by <code>sampleStats</code>
* to <code>mu</code>.
* <p>
* This statistic can be used to perform a one sample t-test for the mean.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li><code>observed.getN() > = 2</code>.
* </li></ul>
*
* @param mu comparison constant
* @param sampleStats DescriptiveStatistics holding sample summary statitstics
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
double t(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException;
/**
* Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* 2-sample t statistic </a>, comparing the means of the datasets described
* by two {@link StatisticalSummary} instances without the assumption of equal sample variances.
* <p>
* This statistic can be used to perform a two-sample t-test to compare
* sample means.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The datasets described by the two Univariates must each contain
* at least 2 observations.
* </li></ul>
*
* @param sampleStats1 StatisticalSummary describing data from the first sample
* @param sampleStats2 StatisticalSummary describing data from the second sample
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
throws IllegalArgumentException;
/** /**
* Returns the <i>observed significance level</i>, or * Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
@ -336,73 +499,4 @@ public interface TTest {
boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2, boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
double alpha) double alpha)
throws IllegalArgumentException, MathException; throws IllegalArgumentException, MathException;
/**
* Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
* two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
* which the dataset described by <code>stats</code> is drawn equals <code>mu</code>.
* <p>
* Returns <code>true</code> iff the null hypothesis can be
* rejected with confidence <code>1 - alpha</code>. To
* perform a 1-sided test, use <code>alpha / 2</code>
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
* the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
* at the 99% level, first verify that the measured sample mean is less
* than <code>mu</code> and then use
* <br><code>tTest(mu, sampleStats, 0.005) </code>
* </li></ol>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the one-sample
* parametric t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The sample must include at least 2 observations.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
boolean tTest(double mu, StatisticalSummary sampleStats, double alpha)
throws IllegalArgumentException, MathException;
/**
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a one-sample, two-tailed t-test
* comparing the mean of the dataset described by <code>sampleStats</code>
* with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean equals
* <code>mu</code> in favor of the two-sided alternative that the mean
* is different from <code>mu</code>. For a one-sided test, divide the
* returned value by 2.
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
* t-test procedure, as discussed
* <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>The sample must contain at least 2 observations.
* </li></ul>
*
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
double tTest(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException, MathException;
} }

View File

@ -26,7 +26,7 @@ import org.apache.commons.math.stat.univariate.StatisticalSummary;
/** /**
* Implements t-test statistics defined in the {@link TTest} interface. * Implements t-test statistics defined in the {@link TTest} interface.
* *
* @version $Revision: 1.2 $ $Date: 2004/05/23 05:04:48 $ * @version $Revision: 1.3 $ $Date: 2004/05/24 05:29:05 $
*/ */
public class TTestImpl implements TTest, Serializable { public class TTestImpl implements TTest, Serializable {
@ -37,6 +37,89 @@ public class TTestImpl implements TTest, Serializable {
super(); super();
} }
//----------------------------------------------- Protected methods
/**
* Computes approximate degrees of freedom for 2-sample t-test.
*
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
* @param n2 second sample n
* @return approximate degrees of freedom
*/
protected double df(double v1, double v2, double n1, double n2) {
return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
(n2 * n2 * (n2 - 1d)));
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.inference.TTest#pairedT(double[], double[])
*/
public double pairedT(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException {
if ((sample1 == null) || (sample2 == null ||
Math.min(sample1.length, sample2.length) < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
double meanDifference = StatUtils.meanDifference(sample1, sample2);
return t(meanDifference, 0,
StatUtils.varianceDifference(sample1, sample2, meanDifference),
(double) sample1.length);
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[])
*/
public double pairedTTest(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException {
double meanDifference = StatUtils.meanDifference(sample1, sample2);
return tTest(meanDifference, 0,
StatUtils.varianceDifference(sample1, sample2, meanDifference),
(double) sample1.length);
}
/* (non-Javadoc)
* @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[], double)
*/
public boolean pairedTTest(
double[] sample1,
double[] sample2,
double alpha)
throws IllegalArgumentException, MathException {
// TODO Auto-generated method stub
return false;
}
/**
* Computes t test statistic for 1-sample t-test.
*
* @param m sample mean
* @param mu constant to test against
* @param v sample variance
* @param n sample n
* @return t test statistic
*/
protected double t(double m, double mu, double v, double n) {
return (m - mu) / Math.sqrt(v / n);
}
/**
* Computes t test statistic for 2-sample t-test.
*
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
* @param n2 second sample n
* @return t test statistic
*/
protected double t(double m1, double m2, double v1, double v2, double n1,double n2) {
return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
}
/** /**
* @param mu comparison constant * @param mu comparison constant
* @param observed array of values * @param observed array of values
@ -51,6 +134,106 @@ public class TTestImpl implements TTest, Serializable {
return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), observed.length); return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), observed.length);
} }
/**
* @param mu comparison constant
* @param sampleStats StatisticalSummary holding sample summary statitstics
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
public double t(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException {
if ((sampleStats == null) || (sampleStats.getN() < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
}
/**
* @param sample1 array of sample data values
* @param sample2 array of sample data values
* @return t-statistic
* @throws IllegalArgumentException if the precondition is not met
*/
public double t(double[] sample1, double[] sample2)
throws IllegalArgumentException {
if ((sample1 == null) || (sample2 == null ||
Math.min(sample1.length, sample2.length) < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return t(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1),
StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length);
}
/**
* @param sampleStats1 StatisticalSummary describing data from the first sample
* @param sampleStats2 StatisticalSummary describing data from the second sample
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
throws IllegalArgumentException {
if ((sampleStats1 == null) ||
(sampleStats2 == null ||
Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN());
}
/**
* Computes p-value for 2-sided, 1-sample t-test.
*
* @param m sample mean
* @param mu constant to test against
* @param v sample variance
* @param n sample n
* @return p-value
* @throws MathException if an error occurs computing the p-value
*/
protected double tTest(double m, double mu, double v, double n)
throws MathException {
double t = Math.abs(t(m, mu, v, n));
TDistribution tDistribution =
DistributionFactory.newInstance().createTDistribution(n - 1);
return 1.0 - tDistribution.cumulativeProbability(-t, t);
}
/**
* Computes p-value for 2-sided, 2-sample t-test.
*
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
* @param n2 second sample n
* @return p-value
* @throws MathException if an error occurs computing the p-value
*/
protected double tTest(double m1, double m2, double v1, double v2, double n1, double n2)
throws MathException {
double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
TDistribution tDistribution =
DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1, n2));
return 1.0 - tDistribution.cumulativeProbability(-t, t);
}
/**
* @param mu constant value to compare sample mean against
* @param sample array of sample data values
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
public double tTest(double mu, double[] sample)
throws IllegalArgumentException, MathException {
if ((sample == null) || (sample.length < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length);
}
/** /**
* @param mu constant value to compare sample mean against * @param mu constant value to compare sample mean against
* @param sample array of sample data values * @param sample array of sample data values
@ -68,19 +251,34 @@ public class TTestImpl implements TTest, Serializable {
} }
/** /**
* @param sample1 array of sample data values * @param mu constant value to compare sample mean against
* @param sample2 array of sample data values * @param sampleStats StatisticalSummary describing sample data
* @return t-statistic * @return p-value
* @throws IllegalArgumentException if the precondition is not met * @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/ */
public double t(double[] sample1, double[] sample2) public double tTest(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException { throws IllegalArgumentException, MathException {
if ((sample1 == null) || (sample2 == null || if ((sampleStats == null) || (sampleStats.getN() < 2)) {
Math.min(sample1.length, sample2.length) < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic"); throw new IllegalArgumentException("insufficient data for t statistic");
} }
return t(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1), return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length); }
/**
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha)
throws IllegalArgumentException, MathException {
if ((alpha <= 0) || (alpha > 0.5)) {
throw new IllegalArgumentException("bad significance level: " + alpha);
}
return (tTest(mu, sampleStats) < alpha);
} }
/** /**
@ -118,52 +316,6 @@ public class TTestImpl implements TTest, Serializable {
return (tTest(sample1, sample2) < alpha); return (tTest(sample1, sample2) < alpha);
} }
/**
* @param mu constant value to compare sample mean against
* @param sample array of sample data values
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
public double tTest(double mu, double[] sample)
throws IllegalArgumentException, MathException {
if ((sample == null) || (sample.length < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length);
}
/**
* @param mu comparison constant
* @param sampleStats StatisticalSummary holding sample summary statitstics
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
public double t(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException {
if ((sampleStats == null) || (sampleStats.getN() < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
}
/**
* @param sampleStats1 StatisticalSummary describing data from the first sample
* @param sampleStats2 StatisticalSummary describing data from the second sample
* @return t statistic
* @throws IllegalArgumentException if the precondition is not met
*/
public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
throws IllegalArgumentException {
if ((sampleStats1 == null) ||
(sampleStats2 == null ||
Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN());
}
/** /**
* @param sampleStats1 StatisticalSummary describing data from the first sample * @param sampleStats1 StatisticalSummary describing data from the first sample
* @param sampleStats2 StatisticalSummary describing data from the second sample * @param sampleStats2 StatisticalSummary describing data from the second sample
@ -199,117 +351,4 @@ public class TTestImpl implements TTest, Serializable {
return (tTest(sampleStats1, sampleStats2) < alpha); return (tTest(sampleStats1, sampleStats2) < alpha);
} }
/**
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha)
throws IllegalArgumentException, MathException {
if ((alpha <= 0) || (alpha > 0.5)) {
throw new IllegalArgumentException("bad significance level: " + alpha);
}
return (tTest(mu, sampleStats) < alpha);
}
/**
* @param mu constant value to compare sample mean against
* @param sampleStats StatisticalSummary describing sample data
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
public double tTest(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException, MathException {
if ((sampleStats == null) || (sampleStats.getN() < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
}
//----------------------------------------------- Protected methods
/**
* Computes approximate degrees of freedom for 2-sample t-test.
*
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
* @param n2 second sample n
* @return approximate degrees of freedom
*/
protected double df(double v1, double v2, double n1, double n2) {
return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
(n2 * n2 * (n2 - 1d)));
}
/**
* Computes t test statistic for 2-sample t-test.
*
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
* @param n2 second sample n
* @return t test statistic
*/
protected double t(double m1, double m2, double v1, double v2, double n1,double n2) {
return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
}
/**
* Computes t test statistic for 1-sample t-test.
*
* @param m sample mean
* @param mu constant to test against
* @param v sample variance
* @param n sample n
* @return t test statistic
*/
protected double t(double m, double mu, double v, double n) {
return (m - mu) / Math.sqrt(v / n);
}
/**
* Computes p-value for 2-sided, 2-sample t-test.
*
* @param m1 first sample mean
* @param m2 second sample mean
* @param v1 first sample variance
* @param v2 second sample variance
* @param n1 first sample n
* @param n2 second sample n
* @return p-value
* @throws MathException if an error occurs computing the p-value
*/
protected double tTest(double m1, double m2, double v1, double v2, double n1, double n2)
throws MathException {
double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
TDistribution tDistribution =
DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1, n2));
return 1.0 - tDistribution.cumulativeProbability(-t, t);
}
/**
* Computes p-value for 2-sided, 1-sample t-test.
*
* @param m sample mean
* @param mu constant to test against
* @param v sample variance
* @param n sample n
* @return p-value
* @throws MathException if an error occurs computing the p-value
*/
protected double tTest(double m, double mu, double v, double n)
throws MathException {
double t = Math.abs(t(m, mu, v, n));
TDistribution tDistribution =
DistributionFactory.newInstance().createTDistribution(n - 1);
return 1.0 - tDistribution.cumulativeProbability(-t, t);
}
} }