From 403e4e49562bcce4b8efd95278696f40eb3a429a Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Mon, 24 May 2004 05:29:05 +0000 Subject: [PATCH] Added support for paired t-tests. PR #29049 Reported by: Joel Freyss git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@141243 13f79535-47bb-0310-9956-ffa450edef68 --- .../commons/math/stat/inference/TTest.java | 450 +++++++++++------- .../math/stat/inference/TTestImpl.java | 377 ++++++++------- 2 files changed, 480 insertions(+), 347 deletions(-) diff --git a/src/java/org/apache/commons/math/stat/inference/TTest.java b/src/java/org/apache/commons/math/stat/inference/TTest.java index 1b340e2ad..10a5c1307 100644 --- a/src/java/org/apache/commons/math/stat/inference/TTest.java +++ b/src/java/org/apache/commons/math/stat/inference/TTest.java @@ -21,9 +21,103 @@ import org.apache.commons.math.stat.univariate.StatisticalSummary; /** * An interface for Student's t-tests. * - * @version $Revision: 1.3 $ $Date: 2004/05/23 05:45:11 $ + * @version $Revision: 1.4 $ $Date: 2004/05/24 05:29:05 $ */ public interface TTest { + + + /** + * Computes a paired, 2-sample t-statistic based on the data in the input + * arrays. The t-statistic returned is equivalent to what would be returned by + * computing the one-sample t-statistic {@link #t(double, double[])}, with + * mu = 0 and the sample array consisting of the (signed) + * differences between corresponding entries in sample1 and + * sample2. + *

+ * Preconditions:

+ * + * @param sample1 array of sample data values + * @param sample2 array of sample data values + * @return t statistic + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if the statistic can not be computed do to a + * convergence or other numerical error. + */ + double pairedT(double[] sample1, double[] sample2) + throws IllegalArgumentException, MathException; + + /** + * Returns the observed significance level, or + * + * p-value, associated with a paired, two-sample, two-tailed t-test + * based on the data in the input arrays. + *

+ * The number returned is the smallest significance level + * at which one can reject the null hypothesis that the mean of the paired + * differences is 0 in favor of the two-sided alternative that the mean paired + * difference is not equal to 0. For a one-sided test, divide the returned + * value by 2. + *

+ * This test is equivalent to a one-sample t-test computed using + * {@link #tTest(double, double[])} with mu = 0 and the sample array + * consisting of the signed differences between corresponding elements of + * sample1 and sample2. + *

+ * Usage Note:
+ * The validity of the p-value depends on the assumptions of the parametric + * t-test procedure, as discussed + * here + *

+ * Preconditions:

+ * + * @param sample1 array of sample data values + * @param sample2 array of sample data values + * @return p-value for t-test + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value + */ + double pairedTTest(double[] sample1, double[] sample2) + throws IllegalArgumentException, MathException; + + /** + * Performs a paired t-test evaluating that null hypothesis that the + * mean of the paired differences between sample1 and + * sample2 is 0 in favor of the two-sided alternative that the + * mean paired difference is not equal to 0, with significance level + * alpha. + *

+ * Returns true iff the null hypothesis can be rejected with + * confidence 1 - alpha. To perform a 1-sided test, use + * alpha / 2 + *

+ * Usage Note:
+ * The validity of the test depends on the assumptions of the parametric + * t-test procedure, as discussed + * here + *

+ * Preconditions:

+ * + * @param sample1 array of sample data values + * @param sample2 array of sample data values + * @param alpha significance level of the test + * @return true if the null hypothesis can be rejected with + * confidence 1 - alpha + * @throws IllegalArgumentException if the preconditions are not met + * @throws MathException if an error occurs performing the test + */ + boolean pairedTTest(double[] sample1, double[] sample2, double alpha) + throws IllegalArgumentException, MathException; + /** * Computes a * t statistic given observed values and a comparison constant. @@ -42,6 +136,25 @@ public interface TTest { double t(double mu, double[] observed) throws IllegalArgumentException; + /** + * Computes a + * t statistic to use in comparing the dataset described by sampleStats + * to mu. + *

+ * This statistic can be used to perform a one sample t-test for the mean. + *

+ * Preconditions:

+ * + * @param mu comparison constant + * @param sampleStats DescriptiveStatistics holding sample summary statitstics + * @return t statistic + * @throws IllegalArgumentException if the precondition is not met + */ + double t(double mu, StatisticalSummary sampleStats) + throws IllegalArgumentException; + /** * Computes a * 2-sample t statistic , without the assumption of equal sample variances. @@ -63,6 +176,164 @@ public interface TTest { double t(double[] sample1, double[] sample2) throws IllegalArgumentException, MathException; + /** + * Computes a + * 2-sample t statistic , comparing the means of the datasets described + * by two {@link StatisticalSummary} instances without the assumption of equal sample variances. + *

+ * This statistic can be used to perform a two-sample t-test to compare + * sample means. + *

+ * Preconditions:

+ * + * @param sampleStats1 StatisticalSummary describing data from the first sample + * @param sampleStats2 StatisticalSummary describing data from the second sample + * @return t statistic + * @throws IllegalArgumentException if the precondition is not met + */ + double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) + throws IllegalArgumentException; + + /** + * Returns the observed significance level, or + * + * p-value, associated with a one-sample, two-tailed t-test + * comparing the mean of the input array with the constant mu. + *

+ * The number returned is the smallest significance level + * at which one can reject the null hypothesis that the mean equals + * mu in favor of the two-sided alternative that the mean + * is different from mu. For a one-sided test, divide the + * returned value by 2. + *

+ * Usage Note:
+ * The validity of the test depends on the assumptions of the parametric + * t-test procedure, as discussed + * here + *

+ * Preconditions:

+ * + * @param mu constant value to compare sample mean against + * @param sample array of sample data values + * @return p-value + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value + */ + double tTest(double mu, double[] sample) + throws IllegalArgumentException, MathException; + + /** + * Performs a + * two-sided t-test evaluating the null hypothesis that the mean of the population from + * which sample is drawn equals mu. + *

+ * Returns true iff the null hypothesis can be + * rejected with confidence 1 - alpha. To + * perform a 1-sided test, use alpha / 2 + *

+ * Examples:

    + *
  1. To test the (2-sided) hypothesis sample mean = mu at + * the 95% level, use
    tTest(mu, sample, 0.05) + *
  2. + *
  3. To test the (one-sided) hypothesis sample mean < mu + * at the 99% level, first verify that the measured sample mean is less + * than mu and then use + *
    tTest(mu, sample, 0.005) + *
+ *

+ * Usage Note:
+ * The validity of the test depends on the assumptions of the one-sample + * parametric t-test procedure, as discussed + * here + *

+ * Preconditions:

+ * + * @param mu constant value to compare sample mean against + * @param sample array of sample data values + * @param alpha significance level of the test + * @return p-value + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error computing the p-value + */ + boolean tTest(double mu, double[] sample, double alpha) + throws IllegalArgumentException, MathException; + + /** + * Returns the observed significance level, or + * + * p-value, associated with a one-sample, two-tailed t-test + * comparing the mean of the dataset described by sampleStats + * with the constant mu. + *

+ * The number returned is the smallest significance level + * at which one can reject the null hypothesis that the mean equals + * mu in favor of the two-sided alternative that the mean + * is different from mu. For a one-sided test, divide the + * returned value by 2. + *

+ * Usage Note:
+ * The validity of the test depends on the assumptions of the parametric + * t-test procedure, as discussed + * here + *

+ * Preconditions:

+ * + * @param mu constant value to compare sample mean against + * @param sampleStats StatisticalSummary describing sample data + * @return p-value + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value + */ + double tTest(double mu, StatisticalSummary sampleStats) + throws IllegalArgumentException, MathException; + + /** + * Performs a + * two-sided t-test evaluating the null hypothesis that the mean of the population from + * which the dataset described by stats is drawn equals mu. + *

+ * Returns true iff the null hypothesis can be + * rejected with confidence 1 - alpha. To + * perform a 1-sided test, use alpha / 2 + *

+ * Examples:

    + *
  1. To test the (2-sided) hypothesis sample mean = mu at + * the 95% level, use
    tTest(mu, sampleStats, 0.05) + *
  2. + *
  3. To test the (one-sided) hypothesis sample mean < mu + * at the 99% level, first verify that the measured sample mean is less + * than mu and then use + *
    tTest(mu, sampleStats, 0.005) + *
+ *

+ * Usage Note:
+ * The validity of the test depends on the assumptions of the one-sample + * parametric t-test procedure, as discussed + * here + *

+ * Preconditions:

+ * + * @param mu constant value to compare sample mean against + * @param sampleStats StatisticalSummary describing sample data values + * @param alpha significance level of the test + * @return p-value + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value + */ + boolean tTest(double mu, StatisticalSummary sampleStats, double alpha) + throws IllegalArgumentException, MathException; + /** * Returns the observed significance level, or * @@ -144,114 +415,6 @@ public interface TTest { boolean tTest(double[] sample1, double[] sample2, double alpha) throws IllegalArgumentException, MathException; - /** - * Performs a - * two-sided t-test evaluating the null hypothesis that the mean of the population from - * which sample is drawn equals mu. - *

- * Returns true iff the null hypothesis can be - * rejected with confidence 1 - alpha. To - * perform a 1-sided test, use alpha / 2 - *

- * Examples:

    - *
  1. To test the (2-sided) hypothesis sample mean = mu at - * the 95% level, use
    tTest(mu, sample, 0.05) - *
  2. - *
  3. To test the (one-sided) hypothesis sample mean < mu - * at the 99% level, first verify that the measured sample mean is less - * than mu and then use - *
    tTest(mu, sample, 0.005) - *
- *

- * Usage Note:
- * The validity of the test depends on the assumptions of the one-sample - * parametric t-test procedure, as discussed - * here - *

- * Preconditions:

- * - * @param mu constant value to compare sample mean against - * @param sample array of sample data values - * @param alpha significance level of the test - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error computing the p-value - */ - boolean tTest(double mu, double[] sample, double alpha) - throws IllegalArgumentException, MathException; - - /** - * Returns the observed significance level, or - * - * p-value, associated with a one-sample, two-tailed t-test - * comparing the mean of the input array with the constant mu. - *

- * The number returned is the smallest significance level - * at which one can reject the null hypothesis that the mean equals - * mu in favor of the two-sided alternative that the mean - * is different from mu. For a one-sided test, divide the - * returned value by 2. - *

- * Usage Note:
- * The validity of the test depends on the assumptions of the parametric - * t-test procedure, as discussed - * here - *

- * Preconditions:

- * - * @param mu constant value to compare sample mean against - * @param sample array of sample data values - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error occurs computing the p-value - */ - double tTest(double mu, double[] sample) - throws IllegalArgumentException, MathException; - - /** - * Computes a - * t statistic to use in comparing the dataset described by sampleStats - * to mu. - *

- * This statistic can be used to perform a one sample t-test for the mean. - *

- * Preconditions:

- * - * @param mu comparison constant - * @param sampleStats DescriptiveStatistics holding sample summary statitstics - * @return t statistic - * @throws IllegalArgumentException if the precondition is not met - */ - double t(double mu, StatisticalSummary sampleStats) - throws IllegalArgumentException; - - /** - * Computes a - * 2-sample t statistic , comparing the means of the datasets described - * by two {@link StatisticalSummary} instances without the assumption of equal sample variances. - *

- * This statistic can be used to perform a two-sample t-test to compare - * sample means. - *

- * Preconditions:

- * - * @param sampleStats1 StatisticalSummary describing data from the first sample - * @param sampleStats2 StatisticalSummary describing data from the second sample - * @return t statistic - * @throws IllegalArgumentException if the precondition is not met - */ - double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) - throws IllegalArgumentException; - /** * Returns the observed significance level, or * @@ -336,73 +499,4 @@ public interface TTest { boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2, double alpha) throws IllegalArgumentException, MathException; - - /** - * Performs a - * two-sided t-test evaluating the null hypothesis that the mean of the population from - * which the dataset described by stats is drawn equals mu. - *

- * Returns true iff the null hypothesis can be - * rejected with confidence 1 - alpha. To - * perform a 1-sided test, use alpha / 2 - *

- * Examples:

    - *
  1. To test the (2-sided) hypothesis sample mean = mu at - * the 95% level, use
    tTest(mu, sampleStats, 0.05) - *
  2. - *
  3. To test the (one-sided) hypothesis sample mean < mu - * at the 99% level, first verify that the measured sample mean is less - * than mu and then use - *
    tTest(mu, sampleStats, 0.005) - *
- *

- * Usage Note:
- * The validity of the test depends on the assumptions of the one-sample - * parametric t-test procedure, as discussed - * here - *

- * Preconditions:

- * - * @param mu constant value to compare sample mean against - * @param sampleStats StatisticalSummary describing sample data values - * @param alpha significance level of the test - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error occurs computing the p-value - */ - boolean tTest(double mu, StatisticalSummary sampleStats, double alpha) - throws IllegalArgumentException, MathException; - - /** - * Returns the observed significance level, or - * - * p-value, associated with a one-sample, two-tailed t-test - * comparing the mean of the dataset described by sampleStats - * with the constant mu. - *

- * The number returned is the smallest significance level - * at which one can reject the null hypothesis that the mean equals - * mu in favor of the two-sided alternative that the mean - * is different from mu. For a one-sided test, divide the - * returned value by 2. - *

- * Usage Note:
- * The validity of the test depends on the assumptions of the parametric - * t-test procedure, as discussed - * here - *

- * Preconditions:

- * - * @param mu constant value to compare sample mean against - * @param sampleStats StatisticalSummary describing sample data - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error occurs computing the p-value - */ - double tTest(double mu, StatisticalSummary sampleStats) - throws IllegalArgumentException, MathException; } diff --git a/src/java/org/apache/commons/math/stat/inference/TTestImpl.java b/src/java/org/apache/commons/math/stat/inference/TTestImpl.java index d4f5d24bb..8030d6f8f 100644 --- a/src/java/org/apache/commons/math/stat/inference/TTestImpl.java +++ b/src/java/org/apache/commons/math/stat/inference/TTestImpl.java @@ -26,7 +26,7 @@ import org.apache.commons.math.stat.univariate.StatisticalSummary; /** * Implements t-test statistics defined in the {@link TTest} interface. * - * @version $Revision: 1.2 $ $Date: 2004/05/23 05:04:48 $ + * @version $Revision: 1.3 $ $Date: 2004/05/24 05:29:05 $ */ public class TTestImpl implements TTest, Serializable { @@ -37,6 +37,89 @@ public class TTestImpl implements TTest, Serializable { super(); } + //----------------------------------------------- Protected methods + + /** + * Computes approximate degrees of freedom for 2-sample t-test. + * + * @param v1 first sample variance + * @param v2 second sample variance + * @param n1 first sample n + * @param n2 second sample n + * @return approximate degrees of freedom + */ + protected double df(double v1, double v2, double n1, double n2) { + return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / + ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / + (n2 * n2 * (n2 - 1d))); + } + + /* (non-Javadoc) + * @see org.apache.commons.math.stat.inference.TTest#pairedT(double[], double[]) + */ + public double pairedT(double[] sample1, double[] sample2) + throws IllegalArgumentException, MathException { + if ((sample1 == null) || (sample2 == null || + Math.min(sample1.length, sample2.length) < 2)) { + throw new IllegalArgumentException("insufficient data for t statistic"); + } + double meanDifference = StatUtils.meanDifference(sample1, sample2); + return t(meanDifference, 0, + StatUtils.varianceDifference(sample1, sample2, meanDifference), + (double) sample1.length); + } + + /* (non-Javadoc) + * @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[]) + */ + public double pairedTTest(double[] sample1, double[] sample2) + throws IllegalArgumentException, MathException { + double meanDifference = StatUtils.meanDifference(sample1, sample2); + return tTest(meanDifference, 0, + StatUtils.varianceDifference(sample1, sample2, meanDifference), + (double) sample1.length); + } + + /* (non-Javadoc) + * @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[], double) + */ + public boolean pairedTTest( + double[] sample1, + double[] sample2, + double alpha) + throws IllegalArgumentException, MathException { + // TODO Auto-generated method stub + return false; + } + + /** + * Computes t test statistic for 1-sample t-test. + * + * @param m sample mean + * @param mu constant to test against + * @param v sample variance + * @param n sample n + * @return t test statistic + */ + protected double t(double m, double mu, double v, double n) { + return (m - mu) / Math.sqrt(v / n); + } + + /** + * Computes t test statistic for 2-sample t-test. + * + * @param m1 first sample mean + * @param m2 second sample mean + * @param v1 first sample variance + * @param v2 second sample variance + * @param n1 first sample n + * @param n2 second sample n + * @return t test statistic + */ + protected double t(double m1, double m2, double v1, double v2, double n1,double n2) { + return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2)); + } + /** * @param mu comparison constant * @param observed array of values @@ -51,6 +134,106 @@ public class TTestImpl implements TTest, Serializable { return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), observed.length); } + /** + * @param mu comparison constant + * @param sampleStats StatisticalSummary holding sample summary statitstics + * @return t statistic + * @throws IllegalArgumentException if the precondition is not met + */ + public double t(double mu, StatisticalSummary sampleStats) + throws IllegalArgumentException { + if ((sampleStats == null) || (sampleStats.getN() < 2)) { + throw new IllegalArgumentException("insufficient data for t statistic"); + } + return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); + } + + /** + * @param sample1 array of sample data values + * @param sample2 array of sample data values + * @return t-statistic + * @throws IllegalArgumentException if the precondition is not met + */ + public double t(double[] sample1, double[] sample2) + throws IllegalArgumentException { + if ((sample1 == null) || (sample2 == null || + Math.min(sample1.length, sample2.length) < 2)) { + throw new IllegalArgumentException("insufficient data for t statistic"); + } + return t(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1), + StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length); + } + + /** + * @param sampleStats1 StatisticalSummary describing data from the first sample + * @param sampleStats2 StatisticalSummary describing data from the second sample + * @return t statistic + * @throws IllegalArgumentException if the precondition is not met + */ + public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) + throws IllegalArgumentException { + if ((sampleStats1 == null) || + (sampleStats2 == null || + Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { + throw new IllegalArgumentException("insufficient data for t statistic"); + } + return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(), + sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN()); + } + + /** + * Computes p-value for 2-sided, 1-sample t-test. + * + * @param m sample mean + * @param mu constant to test against + * @param v sample variance + * @param n sample n + * @return p-value + * @throws MathException if an error occurs computing the p-value + */ + protected double tTest(double m, double mu, double v, double n) + throws MathException { + double t = Math.abs(t(m, mu, v, n)); + TDistribution tDistribution = + DistributionFactory.newInstance().createTDistribution(n - 1); + return 1.0 - tDistribution.cumulativeProbability(-t, t); + } + + /** + * Computes p-value for 2-sided, 2-sample t-test. + * + * @param m1 first sample mean + * @param m2 second sample mean + * @param v1 first sample variance + * @param v2 second sample variance + * @param n1 first sample n + * @param n2 second sample n + * @return p-value + * @throws MathException if an error occurs computing the p-value + */ + protected double tTest(double m1, double m2, double v1, double v2, double n1, double n2) + throws MathException { + double t = Math.abs(t(m1, m2, v1, v2, n1, n2)); + TDistribution tDistribution = + DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1, n2)); + return 1.0 - tDistribution.cumulativeProbability(-t, t); + } + + /** + * @param mu constant value to compare sample mean against + * @param sample array of sample data values + * @return p-value + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value + */ + public double tTest(double mu, double[] sample) + throws IllegalArgumentException, MathException { + if ((sample == null) || (sample.length < 2)) { + throw new IllegalArgumentException("insufficient data for t statistic"); + } + return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length); + } + /** * @param mu constant value to compare sample mean against * @param sample array of sample data values @@ -68,19 +251,34 @@ public class TTestImpl implements TTest, Serializable { } /** - * @param sample1 array of sample data values - * @param sample2 array of sample data values - * @return t-statistic + * @param mu constant value to compare sample mean against + * @param sampleStats StatisticalSummary describing sample data + * @return p-value * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value */ - public double t(double[] sample1, double[] sample2) - throws IllegalArgumentException { - if ((sample1 == null) || (sample2 == null || - Math.min(sample1.length, sample2.length) < 2)) { + public double tTest(double mu, StatisticalSummary sampleStats) + throws IllegalArgumentException, MathException { + if ((sampleStats == null) || (sampleStats.getN() < 2)) { throw new IllegalArgumentException("insufficient data for t statistic"); } - return t(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1), - StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length); + return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); + } + + /** + * @param mu constant value to compare sample mean against + * @param sampleStats StatisticalSummary describing sample data values + * @param alpha significance level of the test + * @return p-value + * @throws IllegalArgumentException if the precondition is not met + * @throws MathException if an error occurs computing the p-value + */ + public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha) + throws IllegalArgumentException, MathException { + if ((alpha <= 0) || (alpha > 0.5)) { + throw new IllegalArgumentException("bad significance level: " + alpha); + } + return (tTest(mu, sampleStats) < alpha); } /** @@ -118,52 +316,6 @@ public class TTestImpl implements TTest, Serializable { return (tTest(sample1, sample2) < alpha); } - /** - * @param mu constant value to compare sample mean against - * @param sample array of sample data values - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error occurs computing the p-value - */ - public double tTest(double mu, double[] sample) - throws IllegalArgumentException, MathException { - if ((sample == null) || (sample.length < 2)) { - throw new IllegalArgumentException("insufficient data for t statistic"); - } - return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length); - } - - /** - * @param mu comparison constant - * @param sampleStats StatisticalSummary holding sample summary statitstics - * @return t statistic - * @throws IllegalArgumentException if the precondition is not met - */ - public double t(double mu, StatisticalSummary sampleStats) - throws IllegalArgumentException { - if ((sampleStats == null) || (sampleStats.getN() < 2)) { - throw new IllegalArgumentException("insufficient data for t statistic"); - } - return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); - } - - /** - * @param sampleStats1 StatisticalSummary describing data from the first sample - * @param sampleStats2 StatisticalSummary describing data from the second sample - * @return t statistic - * @throws IllegalArgumentException if the precondition is not met - */ - public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) - throws IllegalArgumentException { - if ((sampleStats1 == null) || - (sampleStats2 == null || - Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { - throw new IllegalArgumentException("insufficient data for t statistic"); - } - return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(), - sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN()); - } - /** * @param sampleStats1 StatisticalSummary describing data from the first sample * @param sampleStats2 StatisticalSummary describing data from the second sample @@ -199,117 +351,4 @@ public class TTestImpl implements TTest, Serializable { return (tTest(sampleStats1, sampleStats2) < alpha); } - /** - * @param mu constant value to compare sample mean against - * @param sampleStats StatisticalSummary describing sample data values - * @param alpha significance level of the test - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error occurs computing the p-value - */ - public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha) - throws IllegalArgumentException, MathException { - if ((alpha <= 0) || (alpha > 0.5)) { - throw new IllegalArgumentException("bad significance level: " + alpha); - } - return (tTest(mu, sampleStats) < alpha); - } - - /** - * @param mu constant value to compare sample mean against - * @param sampleStats StatisticalSummary describing sample data - * @return p-value - * @throws IllegalArgumentException if the precondition is not met - * @throws MathException if an error occurs computing the p-value - */ - public double tTest(double mu, StatisticalSummary sampleStats) - throws IllegalArgumentException, MathException { - if ((sampleStats == null) || (sampleStats.getN() < 2)) { - throw new IllegalArgumentException("insufficient data for t statistic"); - } - return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); - } - - //----------------------------------------------- Protected methods - - /** - * Computes approximate degrees of freedom for 2-sample t-test. - * - * @param v1 first sample variance - * @param v2 second sample variance - * @param n1 first sample n - * @param n2 second sample n - * @return approximate degrees of freedom - */ - protected double df(double v1, double v2, double n1, double n2) { - return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / - ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / - (n2 * n2 * (n2 - 1d))); - } - - /** - * Computes t test statistic for 2-sample t-test. - * - * @param m1 first sample mean - * @param m2 second sample mean - * @param v1 first sample variance - * @param v2 second sample variance - * @param n1 first sample n - * @param n2 second sample n - * @return t test statistic - */ - protected double t(double m1, double m2, double v1, double v2, double n1,double n2) { - return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2)); - } - - /** - * Computes t test statistic for 1-sample t-test. - * - * @param m sample mean - * @param mu constant to test against - * @param v sample variance - * @param n sample n - * @return t test statistic - */ - protected double t(double m, double mu, double v, double n) { - return (m - mu) / Math.sqrt(v / n); - } - - /** - * Computes p-value for 2-sided, 2-sample t-test. - * - * @param m1 first sample mean - * @param m2 second sample mean - * @param v1 first sample variance - * @param v2 second sample variance - * @param n1 first sample n - * @param n2 second sample n - * @return p-value - * @throws MathException if an error occurs computing the p-value - */ - protected double tTest(double m1, double m2, double v1, double v2, double n1, double n2) - throws MathException { - double t = Math.abs(t(m1, m2, v1, v2, n1, n2)); - TDistribution tDistribution = - DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1, n2)); - return 1.0 - tDistribution.cumulativeProbability(-t, t); - } - - /** - * Computes p-value for 2-sided, 1-sample t-test. - * - * @param m sample mean - * @param mu constant to test against - * @param v sample variance - * @param n sample n - * @return p-value - * @throws MathException if an error occurs computing the p-value - */ - protected double tTest(double m, double mu, double v, double n) - throws MathException { - double t = Math.abs(t(m, mu, v, n)); - TDistribution tDistribution = - DistributionFactory.newInstance().createTDistribution(n - 1); - return 1.0 - tDistribution.cumulativeProbability(-t, t); - } }