From 5e7fe90154176753fb2d77fddac2bd5035ec7ba6 Mon Sep 17 00:00:00 2001 From: "Mark R. Diggory" Date: Sat, 21 Jun 2003 23:00:39 +0000 Subject: [PATCH] Moving TestStatistic implementation / interface into stat package. git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140938 13f79535-47bb-0310-9956-ffa450edef68 --- .../commons/math/stat/TestStatistic.java | 106 +++++++++++++ .../commons/math/stat/TestStatisticImpl.java | 133 ++++++++++++++++ .../apache/commons/math/RandomDataTest.java | 3 +- .../commons/math/stat/TestStatisticTest.java | 145 ++++++++++++++++++ 4 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 src/java/org/apache/commons/math/stat/TestStatistic.java create mode 100644 src/java/org/apache/commons/math/stat/TestStatisticImpl.java create mode 100644 src/test/org/apache/commons/math/stat/TestStatisticTest.java diff --git a/src/java/org/apache/commons/math/stat/TestStatistic.java b/src/java/org/apache/commons/math/stat/TestStatistic.java new file mode 100644 index 000000000..c3736fa8a --- /dev/null +++ b/src/java/org/apache/commons/math/stat/TestStatistic.java @@ -0,0 +1,106 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ +package org.apache.commons.math.stat; + +/** + * Interfaces for the following test statistics + * + * @author Phil Steitz + * @version $Revision: 1.1 $ $Date: 2003/06/21 23:00:39 $ + * + */ +public interface TestStatistic { + + /** + * Description: + * Computes Chi-Square statistic given observed and expected freqeuncy counts
+ * This statistic can be used to perform Chi-Square tests for goodness + * of fit.
+ * Definition: + * http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm
+ * Preconditions: + * @param observed array of observed frequency counts + * @param expected array of exptected frequency counts + * @throws IllegalArgumentException if input arrays have different lengths + * or length is less than 2 + */ + public double chiSquare(double[] expected, double[] observed); + + /** + * Description: + * Computes one sample, t-test statistic given observed values
+ * This statistic can be used to perform one sample tests for means.
+ * Definition: + * http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
+ * Preconditions: + * @param mu hypothesized mean value. + * @param observed array of observed values + * @throws IllegalArgumentException if input array length is less than 2 + */ + public double t(double mu, double[] observed); +} + diff --git a/src/java/org/apache/commons/math/stat/TestStatisticImpl.java b/src/java/org/apache/commons/math/stat/TestStatisticImpl.java new file mode 100644 index 000000000..a59e6f4fe --- /dev/null +++ b/src/java/org/apache/commons/math/stat/TestStatisticImpl.java @@ -0,0 +1,133 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +package org.apache.commons.math.stat; + + +/** + * Implements the following test statistics + * @author Phil Steitz + * @version $Revision: 1.1 $ $Date: 2003/06/21 23:00:39 $ + * + */ +public class TestStatisticImpl implements TestStatistic { + + /** + * Default constructor. + */ + public TestStatisticImpl() { + } + + /** + * Computes Chi-Square statistic given observed and expected counts
+ * Algorithm: + * http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm
+ * Numerical considerations: none
+ * @param observed array of observed frequency counts + * @param expected array of expected frequency counts + * @throws IllegalArgumentException if input arrays have different lengths + * or length is less than 2 + */ + public double chiSquare(double[] expected, double[] observed) { + double sumSq = 0.0d; + double dev = 0.0d; + if ((expected.length < 2) || (expected.length != observed.length)) { + throw new IllegalArgumentException + ("observed, expected array lengths incorrect"); + } + for (int i = 0; i < observed.length; i++) { + dev = (observed[i] - expected[i]); + sumSq += dev * dev / expected[i]; + } + + return sumSq; + } + + /** + * Computes t statistic given observed values
+ * Algorithm: + * http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
+ * Numerical considerations: none
+ * @param mu hypothesized mean value. + * @param observed array of observed values + * @return t-test statistic for the hypothesized mean and observed values. + * @throws IllegalArgumentException if input array length is less than 2 + */ + public double t(double mu, double[] observed) { + if((observed == null) || (observed.length < 2)) { + throw new IllegalArgumentException + ("observed array length incorrect"); + } + + // leverage Univariate to compute statistics + Univariate univariate = new UnivariateImpl(); + for (int i = 0; i < observed.length; i++) { + univariate.addValue(observed[i]); + } + double n = univariate.getN(); + double xbar = univariate.getMean(); + double std = univariate.getStandardDeviation(); + + return (xbar - mu) / (std / Math.sqrt(n)); + } +} diff --git a/src/test/org/apache/commons/math/RandomDataTest.java b/src/test/org/apache/commons/math/RandomDataTest.java index 161352de8..7ddb074b7 100644 --- a/src/test/org/apache/commons/math/RandomDataTest.java +++ b/src/test/org/apache/commons/math/RandomDataTest.java @@ -62,6 +62,7 @@ import java.security.NoSuchAlgorithmException; import java.util.Collection; import java.util.HashSet; +import org.apache.commons.math.stat.TestStatisticImpl; import org.apache.commons.math.stat.Univariate; import org.apache.commons.math.stat.UnivariateImpl; @@ -69,7 +70,7 @@ import org.apache.commons.math.stat.UnivariateImpl; * Test cases for the RandomData class. * * @author Phil Steitz - * @version $Revision: 1.5 $ $Date: 2003/06/04 02:45:49 $ + * @version $Revision: 1.6 $ $Date: 2003/06/21 23:00:39 $ */ public final class RandomDataTest extends TestCase { diff --git a/src/test/org/apache/commons/math/stat/TestStatisticTest.java b/src/test/org/apache/commons/math/stat/TestStatisticTest.java new file mode 100644 index 000000000..2b26ed4af --- /dev/null +++ b/src/test/org/apache/commons/math/stat/TestStatisticTest.java @@ -0,0 +1,145 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ +package org.apache.commons.math.stat; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; +/** + * Test cases for the TestStatistic class. + * + * @author Phil Steitz + * @version $Revision: 1.1 $ $Date: 2003/06/21 23:00:39 $ + */ + +public final class TestStatisticTest extends TestCase { + + private TestStatisticImpl testStatistic = new TestStatisticImpl(); + + public TestStatisticTest(String name) { + super(name); + } + + + public void setUp() { + } + + public static Test suite() { + TestSuite suite = new TestSuite(TestStatisticTest.class); + suite.setName("TestStatistic Tests"); + return suite; + } + + public void testChiSquare() { + double[] observed = {11,24,69,96}; + double[] expected = {8.2,25.2,65.8,100.8}; + assertEquals("chi-square statistic", + 1.39743495,testStatistic.chiSquare(expected,observed),10E-5); + + double[] tooShortObs = {0}; + double[] tooShortEx = {1}; + try { + testStatistic.chiSquare(tooShortObs,tooShortEx); + fail("arguments too short, IllegalArgumentException expected"); + } catch (IllegalArgumentException ex) { + ; + } + + double[] unMatchedObs = {0,1,2,3}; + double[] unMatchedEx = {1,1,2}; + try { + testStatistic.chiSquare(unMatchedEx,unMatchedObs); + fail("arrays have different lengths, IllegalArgumentException expected"); + } catch (IllegalArgumentException ex) { + ; + } + + expected[0] = 0; + assertEquals("chi-square statistic", Double.POSITIVE_INFINITY, + testStatistic.chiSquare(expected,observed),Double.MIN_VALUE); + } + + public void testT(){ + double[] observed = {93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, + 94.0, 101.0, 88.0, 98.0, 94.0, 101.0, 92.0, 95.0}; + double mu = 100.0; + assertEquals("t statistic", -2.82, testStatistic.t(mu, observed), + 10E-3); + + double[] nullObserved = null; + try { + testStatistic.t(mu, nullObserved); + fail("arguments too short, IllegalArgumentException expected"); + } catch (IllegalArgumentException ex) { + ; + } + + double[] emptyObs = {}; + try { + testStatistic.t(mu, emptyObs); + fail("arguments too short, IllegalArgumentException expected"); + } catch (IllegalArgumentException ex) { + ; + } + + double[] tooShortObs = {1.0}; + try { + testStatistic.t(mu, tooShortObs); + fail("arguments too short, IllegalArgumentException expected"); + } catch (IllegalArgumentException ex) { + ; + } + } +} +