Initial Certified Tests for Univariate. Note UnivariateImpl currently fails

git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140876 13f79535-47bb-0310-9956-ffa450edef68
2003-06-02 04:25:52 +00:00 · 2003-06-02 04:25:52 +00:00 · 60314410e2
parent d23bf18b40
commit 60314410e2
10 changed files with 9339 additions and 0 deletions
--- a/src/test/org/apache/commons/math/stat/CertifiedDataTest.java
+++ b/src/test/org/apache/commons/math/stat/CertifiedDataTest.java
@ -0,0 +1,225 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2003 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ *    any, must include the following acknowlegement:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowlegement may appear in the software itself,
+ *    if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "The Jakarta Project", "Commons", and "Apache Software
+ *    Foundation" must not be used to endorse or promote products derived
+ *    from this software without prior written permission. For written
+ *    permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ *    nor may "Apache" appear in their names without prior written
+ *    permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+package org.apache.commons.math.stat;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.net.URL;
+import org.apache.commons.logging.*;
+/**
+ * Test cases for the {@link Univariate} class.
+ *
+ * @author Mark R. Diggory
+ * @version $Revision: 1.1 $ $Date: 2003/06/02 04:25:52 $
+ */
+
+public class CertifiedDataTest extends TestCase {
+
+	protected double tolerance = .01;
+
+	protected File[] dataFiles = null;
+
+	protected Univariate u = null;
+
+	protected double mean = Double.NaN;
+
+	protected double std = Double.NaN;
+
+	protected Log log = LogFactory.getLog(this.getClass());
+
+	/**
+	 * Certified Data Test Constructor
+	 * @param name
+	 */
+	public CertifiedDataTest(String name) {
+		super(name);
+		setUp();
+	}
+
+	/* (non-Javadoc)
+	 * @see junit.framework.TestCase#setUp()
+	 */
+	public void setUp() {
+		URL url = getClass().getResource("data");
+		File file = new File(url.getFile());
+
+		dataFiles = file.listFiles(new FilenameFilter() {
+			public boolean accept(File dir, String name) {
+				return name.endsWith(".dat");
+			}
+		});
+
+	}
+
+	/**
+	 * @return The test suite
+	 */
+	public static Test suite() {
+		TestSuite suite = new TestSuite(CertifiedDataTest.class);
+		suite.setName("Certified Tests");
+		return suite;
+	}
+
+	/**
+	 * Test UnivariateImpl
+	 */
+	public void testUnivariateImpl() {
+
+		for (int i = 0; i < dataFiles.length; i++) {
+
+			u = new UnivariateImpl();
+
+			loadStats(dataFiles[i]);
+
+			assertEquals(
+				dataFiles[i].getName() + ":std",
+				std,
+				u.getStandardDeviation(),
+				tolerance);
+				
+			assertEquals(
+				dataFiles[i].getName() + ":mean",
+				mean,
+				u.getMean(),
+				tolerance);
+
+		}
+	}
+
+	/**
+	 * Test UnivariateImpl
+	 */
+	public void testStoredUnivariateImpl() {
+
+		for (int i = 0; i < dataFiles.length; i++) {
+
+			u = new StoreUnivariateImpl();
+
+			loadStats(dataFiles[i]);
+
+			assertEquals(
+				dataFiles[i].getName() + ":std",
+				std,
+				u.getStandardDeviation(),
+				tolerance);
+			assertEquals(
+				dataFiles[i].getName() + ":mean",
+				mean,
+				u.getMean(),
+				tolerance);
+
+		}
+	}
+
+	/**
+	 * loads a Univariate off of a test file
+	 * @param file
+	 */
+	private void loadStats(File file) {
+
+		try {
+
+			u.clear();
+			mean = Double.NaN;
+			std = Double.NaN;
+			
+			BufferedReader in = new BufferedReader(new FileReader(file));
+
+			String line = null;
+
+			for (int j = 0; j < 60; j++) {
+				line = in.readLine();
+				if (j == 40) {
+					mean =
+						Double.parseDouble(
+							line.substring(line.lastIndexOf(":") + 1).trim());
+				}
+				if (j == 41) {
+					std =
+						Double.parseDouble(
+							line.substring(line.lastIndexOf(":") + 1).trim());
+				}
+			}
+
+			line = in.readLine();
+
+			while (line != null) {
+				if(file.getName().equals("NumAcc4.dat")){
+					System.out.println("Line: " + line);
+					System.out.println("Mean: " + u.getMean()); 
+					System.out.println("Std: " + u.getStandardDeviation()); 
+				}				
+				
+				
+				u.addValue(Double.parseDouble(line.trim()));
+				line = in.readLine();
+			}
+
+			in.close();
+			System.out.println(" "); 
+		} catch (FileNotFoundException fnfe) {
+			log.error(fnfe.getMessage(), fnfe);
+		} catch (IOException ioe) {
+			log.error(ioe.getMessage(), ioe);
+		}
+	}
+}
--- a/src/test/org/apache/commons/math/stat/data/Lew.dat
+++ b/src/test/org/apache/commons/math/stat/data/Lew.dat
@ -0,0 +1,260 @@
+File Name:     Lew.dat
+
+File Format:   ASCII
+               Header          : lines  1 to  60     (=  60)
+               Certified Values: lines 41 to  43     (=   3)
+               Data            : lines 61 to 260     (= 200)
+
+Dataset Name:  Lew (Beam Deflection Data)
+
+Description:   This is an observed/"real world" data set
+               consisting of 200 deflections of a steel-concrete
+               beam while subjected to periodic pressure.
+               The experimenter was H. S. Lew of the
+               Center for Building Technology at NIST.
+               We here use this data to test accuracy
+               in summary statistics calculations.
+
+Stat Category: Univariate: Summary Statistics
+
+Reference:     None
+
+Data:          "Real World"
+               1    Response          : y = beam deflection
+               0    Predictors
+               200  Observations
+
+Model:         Lower Level of Difficulty
+               2    Parameters        : mu, sigma
+               1    Response Variable : y
+               0    Predictor Variables
+
+               y    = mu + e
+
+
+
+
+
+
+
+                                                  Certified Values
+Sample Mean                                ybar:  -177.435000000000
+Sample Standard Deviation (denom. = n-1)      s:   277.332168044316
+Sample Autocorrelation Coefficient (lag 1) r(1):  -0.307304800605679
+
+Number of Observations:                             200
+
+
+
+
+
+
+
+
+
+
+
+
+
+Data: Y
+----------
+    -213
+    -564
+     -35
+     -15
+     141
+     115
+    -420
+    -360
+     203
+    -338
+    -431
+     194
+    -220
+    -513
+     154
+    -125
+    -559
+      92
+     -21
+    -579
+     -52
+      99
+    -543
+    -175
+     162
+    -457
+    -346
+     204
+    -300
+    -474
+     164
+    -107
+    -572
+      -8
+      83
+    -541
+    -224
+     180
+    -420
+    -374
+     201
+    -236
+    -531
+      83
+      27
+    -564
+    -112
+     131
+    -507
+    -254
+     199
+    -311
+    -495
+     143
+     -46
+    -579
+     -90
+     136
+    -472
+    -338
+     202
+    -287
+    -477
+     169
+    -124
+    -568
+      17
+      48
+    -568
+    -135
+     162
+    -430
+    -422
+     172
+     -74
+    -577
+     -13
+      92
+    -534
+    -243
+     194
+    -355
+    -465
+     156
+     -81
+    -578
+     -64
+     139
+    -449
+    -384
+     193
+    -198
+    -538
+     110
+     -44
+    -577
+      -6
+      66
+    -552
+    -164
+     161
+    -460
+    -344
+     205
+    -281
+    -504
+     134
+     -28
+    -576
+    -118
+     156
+    -437
+    -381
+     200
+    -220
+    -540
+      83
+      11
+    -568
+    -160
+     172
+    -414
+    -408
+     188
+    -125
+    -572
+     -32
+     139
+    -492
+    -321
+     205
+    -262
+    -504
+     142
+     -83
+    -574
+       0
+      48
+    -571
+    -106
+     137
+    -501
+    -266
+     190
+    -391
+    -406
+     194
+    -186
+    -553
+      83
+     -13
+    -577
+     -49
+     103
+    -515
+    -280
+     201
+     300
+    -506
+     131
+     -45
+    -578
+     -80
+     138
+    -462
+    -361
+     201
+    -211
+    -554
+      32
+      74
+    -533
+    -235
+     187
+    -372
+    -442
+     182
+    -147
+    -566
+      25
+      68
+    -535
+    -244
+     194
+    -351
+    -463
+     174
+    -125
+    -570
+      15
+      72
+    -550
+    -190
+     172
+    -424
+    -385
+     198
+    -218
+    -536
+      96
--- a/src/test/org/apache/commons/math/stat/data/Lottery.dat
+++ b/src/test/org/apache/commons/math/stat/data/Lottery.dat
@ -0,0 +1,278 @@
+File Name:     Lottery.dat
+
+File Format:   ASCII
+               Header          : lines  1 to  60     (=  60)
+               Certified Values: lines 41 to  43     (=   3)
+               Data            : lines 61 to 278     (= 218)
+
+Dataset Name:  Lottery
+
+Description:   This is an observed/"real world" data set
+               consisting of 218 lottery values
+               from September 3, 1989 to April 14, 1990 (32 weeks).
+               One 3-digit random number (from 000 to 999)
+               is drawn per day, 7 days per week for most
+               weeks, but fewer days per week for some weeks.
+               We here use this data to test accuracy
+               in summary statistics calculations.
+
+Stat Category: Univariate: Summary Statistics
+
+Reference:     None
+
+Data:          "Real World"
+               1    Response          : y = 3-digit random number
+               0    Predictors
+               218  Observations
+
+Model:         Lower Level of Difficulty
+               2    Parameters        : mu, sigma
+               1    Response Variable : y
+               0    Predictor Variables
+
+               y    = mu + e
+
+
+
+
+
+
+                                                  Certified Values
+Sample Mean                                ybar:  518.958715596330
+Sample Standard Deviation (denom. = n-1)      s:  291.699727470969
+Sample Autocorrelation Coefficient (lag 1) r(1):  -0.120948622967393
+
+Number of Observations:                             218
+
+
+
+
+
+
+
+
+
+
+
+
+
+Data: Y
+----------
+     162
+     671
+     933
+     414
+     788
+     730
+     817
+      33
+     536
+     875
+     670
+     236
+     473
+     167
+     877
+     980
+     316
+     950
+     456
+      92
+     517
+     557
+     956
+     954
+     104
+     178
+     794
+     278
+     147
+     773
+     437
+     435
+     502
+     610
+     582
+     780
+     689
+     562
+     964
+     791
+      28
+      97
+     848
+     281
+     858
+     538
+     660
+     972
+     671
+     613
+     867
+     448
+     738
+     966
+     139
+     636
+     847
+     659
+     754
+     243
+     122
+     455
+     195
+     968
+     793
+      59
+     730
+     361
+     574
+     522
+      97
+     762
+     431
+     158
+     429
+     414
+      22
+     629
+     788
+     999
+     187
+     215
+     810
+     782
+      47
+      34
+     108
+     986
+      25
+     644
+     829
+     630
+     315
+     567
+     919
+     331
+     207
+     412
+     242
+     607
+     668
+     944
+     749
+     168
+     864
+     442
+     533
+     805
+     372
+      63
+     458
+     777
+     416
+     340
+     436
+     140
+     919
+     350
+     510
+     572
+     905
+     900
+      85
+     389
+     473
+     758
+     444
+     169
+     625
+     692
+     140
+     897
+     672
+     288
+     312
+     860
+     724
+     226
+     884
+     508
+     976
+     741
+     476
+     417
+     831
+      15
+     318
+     432
+     241
+     114
+     799
+     955
+     833
+     358
+     935
+     146
+     630
+     830
+     440
+     642
+     356
+     373
+     271
+     715
+     367
+     393
+     190
+     669
+       8
+     861
+     108
+     795
+     269
+     590
+     326
+     866
+      64
+     523
+     862
+     840
+     219
+     382
+     998
+       4
+     628
+     305
+     747
+     247
+      34
+     747
+     729
+     645
+     856
+     974
+      24
+     568
+      24
+     694
+     608
+     480
+     410
+     729
+     947
+     293
+      53
+     930
+     223
+     203
+     677
+     227
+      62
+     455
+     387
+     318
+     562
+     242
+     428
+     968
--- a/src/test/org/apache/commons/math/stat/data/Mavro.dat
+++ b/src/test/org/apache/commons/math/stat/data/Mavro.dat
@ -0,0 +1,110 @@
+File Name:     Mavro.dat
+
+File Format:   ASCII
+               Header          : lines  1 to  60     (= 60)
+               Certified Values: lines 41 to  43     (=  3)
+               Data            : lines 61 to 110     (= 50)
+
+Dataset Name:  Mavro (Filter Transmittance Data)
+
+Description:   This is an observed/"real world" data set
+               consisting of 50 transmittance measurements
+               (at a sampling rate of 10 observations per second)
+               from a filter with a nominal value of 2.
+               The experimenter was Radu Mavrodineaunu,
+               a member of the chemistry staff at NIST.
+               We here use this data to test accuracy
+               in summary statistics calculations.
+
+Stat Category: Univariate: Summary Statistics
+
+Reference:     None
+
+Data:          "Real World"
+               1    Response          : y = transmittance
+               0    Predictors
+               50   Observations
+
+Model:         Lower Level of Difficulty
+               2    Parameters        : mu, sigma
+               1    Response Variable : y
+               0    Predictor Variables
+
+               y    = mu + e
+
+
+
+
+
+
+                                                  Certified Values
+Sample Mean                                ybar:  2.00185600000000
+Sample Standard Deviation (denom. = n-1)      s:  0.000429123454003053
+Sample Autocorrelation Coefficient (lag 1) r(1):  0.937989183438248
+
+Number of Observations:                              50
+
+
+
+
+
+
+
+
+
+
+
+
+
+Data: Y
+-------------
+   2.00180
+   2.00170
+   2.00180
+   2.00190
+   2.00180
+   2.00170
+   2.00150
+   2.00140
+   2.00150
+   2.00150
+   2.00170
+   2.00180
+   2.00180
+   2.00190
+   2.00190
+   2.00210
+   2.00200
+   2.00160
+   2.00140
+   2.00130
+   2.00130
+   2.00150
+   2.00150
+   2.00160
+   2.00150
+   2.00140
+   2.00130
+   2.00140
+   2.00150
+   2.00140
+   2.00150
+   2.00160
+   2.00150
+   2.00160
+   2.00190
+   2.00200
+   2.00200
+   2.00210
+   2.00220
+   2.00230
+   2.00240
+   2.00250
+   2.00270
+   2.00260
+   2.00260
+   2.00260
+   2.00270
+   2.00260
+   2.00250
+   2.00240
--- a/src/test/org/apache/commons/math/stat/data/Michelso.dat
+++ b/src/test/org/apache/commons/math/stat/data/Michelso.dat
@ -0,0 +1,160 @@
+File Name:     Michelso.dat
+
+File Format:   ASCII
+               Header          : lines  1 to  60     (=  60)
+               Certified Values: lines 41 to  43     (=   3)
+               Data            : lines 61 to 160     (= 100)
+
+Dataset Name:  Michelso (Speed of Light Data, in millions of meters per second)
+
+Description:   This is an observed/"real world" data set
+               consisting of 100 measurements of the
+               speed of light in air.  This classic experiment
+               was carried out by Michelson is 1879.
+               We here use this data to test accuracy
+               in summary statistics calculations.
+
+Stat Category: Univariate: Summary Statistics
+
+Reference:     Dorsey, Ernest N. (1944). The Velocity of Light.
+               Transactions of the American Philiosophical
+               Society, Volume 34, Part 1, Pages 1-110, Table 22.
+
+               y    = mu + e
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                  Certified Values
+Sample Mean                                ybar:  299.852400000000
+Sample Standard Deviation (denom. = n-1)      s:  0.0790105478190518
+Sample Autocorrelation Coefficient (lag 1) r(1):  0.535199668621283
+
+Number of Observations:                             100
+
+
+
+
+
+
+
+
+
+
+
+
+
+Data: Y
+----------
+  299.85
+  299.74
+  299.90
+  300.07
+  299.93
+  299.85
+  299.95
+  299.98
+  299.98
+  299.88
+  300.00
+  299.98
+  299.93
+  299.65
+  299.76
+  299.81
+  300.00
+  300.00
+  299.96
+  299.96
+  299.96
+  299.94
+  299.96
+  299.94
+  299.88
+  299.80
+  299.85
+  299.88
+  299.90
+  299.84
+  299.83
+  299.79
+  299.81
+  299.88
+  299.88
+  299.83
+  299.80
+  299.79
+  299.76
+  299.80
+  299.88
+  299.88
+  299.88
+  299.86
+  299.72
+  299.72
+  299.62
+  299.86
+  299.97
+  299.95
+  299.88
+  299.91
+  299.85
+  299.87
+  299.84
+  299.84
+  299.85
+  299.84
+  299.84
+  299.84
+  299.89
+  299.81
+  299.81
+  299.82
+  299.80
+  299.77
+  299.76
+  299.74
+  299.75
+  299.76
+  299.91
+  299.92
+  299.89
+  299.86
+  299.88
+  299.72
+  299.84
+  299.85
+  299.85
+  299.78
+  299.89
+  299.84
+  299.78
+  299.81
+  299.76
+  299.81
+  299.79
+  299.81
+  299.82
+  299.85
+  299.87
+  299.87
+  299.81
+  299.74
+  299.81
+  299.94
+  299.95
+  299.80
+  299.81
+  299.87
--- a/src/test/org/apache/commons/math/stat/data/NumAcc1.dat
+++ b/src/test/org/apache/commons/math/stat/data/NumAcc1.dat
@ -0,0 +1,63 @@
+File Name:     NumAcc1.dat
+
+File Format:   ASCII
+               Header          : lines  1 to 60       (= 60)
+               Certified Values: lines 41 to 43       (=  3)
+               Data            : lines 61 to 63       (=  3)
+
+Dataset Name:  NumAcc1
+
+Description:   This is a constructed/fabricated data set
+               to test accuracy in summary statistic calculations.
+               The numbers are large (8-digit integers) and
+               differ only in the last decimal place.
+               Note--by construction, this data set has
+                     sample mean                  = 10000002 (exact)
+                     sample standard deviation    =        1 (exact)
+                     sample autocorrelation coef. =     -0.5 (exact)
+
+Stat Category: Univariate: Summary Statistics
+
+Reference:     Simon, Stephen D. and Lesage, James P. (1989).
+               Assessing the Accuracy of ANOVA Caluclations
+               in Statistical Software", Computational
+               Statistics & data Analysis, 8, pp. 325-332.
+
+Data:          Constructed
+               1    Response           : y
+               0    Predictors
+               3    Observations
+
+Model:         Lower Level of Difficulty
+               2    Parameters         : mu, sigma
+               1    Response Variable  : y
+               0    Predictor Variables
+
+               y    = mu + e
+
+
+
+                                                  Certified Values
+Sample Mean                                ybar:  10000002 
+Sample Standard Deviation (denom. = n-1)      s:         1 
+Sample Autocorrelation Coefficient (lag 1) r(1):      -0.5 
+
+Number of Observations:                                       3
+
+
+
+
+
+
+
+
+
+
+
+
+
+Data: Y
+---------
+10000001
+10000003
+10000002
--- a/src/test/org/apache/commons/math/stat/data/NumAcc2.dat
+++ b/src/test/org/apache/commons/math/stat/data/NumAcc2.dat
--- a/src/test/org/apache/commons/math/stat/data/NumAcc3.dat
+++ b/src/test/org/apache/commons/math/stat/data/NumAcc3.dat
--- a/src/test/org/apache/commons/math/stat/data/NumAcc4.dat
+++ b/src/test/org/apache/commons/math/stat/data/NumAcc4.dat
--- a/src/test/org/apache/commons/math/stat/data/PiDigits.dat
+++ b/src/test/org/apache/commons/math/stat/data/PiDigits.dat