Initial Certified Tests for Univariate. Note UnivariateImpl currently fails

git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140876 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark R. Diggory 2003-06-02 04:25:52 +00:00
parent d23bf18b40
commit 60314410e2
10 changed files with 9339 additions and 0 deletions

View File

@ -0,0 +1,225 @@
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.commons.math.stat;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.IOException;
import java.net.URL;
import org.apache.commons.logging.*;
/**
* Test cases for the {@link Univariate} class.
*
* @author Mark R. Diggory
* @version $Revision: 1.1 $ $Date: 2003/06/02 04:25:52 $
*/
public class CertifiedDataTest extends TestCase {
protected double tolerance = .01;
protected File[] dataFiles = null;
protected Univariate u = null;
protected double mean = Double.NaN;
protected double std = Double.NaN;
protected Log log = LogFactory.getLog(this.getClass());
/**
* Certified Data Test Constructor
* @param name
*/
public CertifiedDataTest(String name) {
super(name);
setUp();
}
/* (non-Javadoc)
* @see junit.framework.TestCase#setUp()
*/
public void setUp() {
URL url = getClass().getResource("data");
File file = new File(url.getFile());
dataFiles = file.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".dat");
}
});
}
/**
* @return The test suite
*/
public static Test suite() {
TestSuite suite = new TestSuite(CertifiedDataTest.class);
suite.setName("Certified Tests");
return suite;
}
/**
* Test UnivariateImpl
*/
public void testUnivariateImpl() {
for (int i = 0; i < dataFiles.length; i++) {
u = new UnivariateImpl();
loadStats(dataFiles[i]);
assertEquals(
dataFiles[i].getName() + ":std",
std,
u.getStandardDeviation(),
tolerance);
assertEquals(
dataFiles[i].getName() + ":mean",
mean,
u.getMean(),
tolerance);
}
}
/**
* Test UnivariateImpl
*/
public void testStoredUnivariateImpl() {
for (int i = 0; i < dataFiles.length; i++) {
u = new StoreUnivariateImpl();
loadStats(dataFiles[i]);
assertEquals(
dataFiles[i].getName() + ":std",
std,
u.getStandardDeviation(),
tolerance);
assertEquals(
dataFiles[i].getName() + ":mean",
mean,
u.getMean(),
tolerance);
}
}
/**
* loads a Univariate off of a test file
* @param file
*/
private void loadStats(File file) {
try {
u.clear();
mean = Double.NaN;
std = Double.NaN;
BufferedReader in = new BufferedReader(new FileReader(file));
String line = null;
for (int j = 0; j < 60; j++) {
line = in.readLine();
if (j == 40) {
mean =
Double.parseDouble(
line.substring(line.lastIndexOf(":") + 1).trim());
}
if (j == 41) {
std =
Double.parseDouble(
line.substring(line.lastIndexOf(":") + 1).trim());
}
}
line = in.readLine();
while (line != null) {
if(file.getName().equals("NumAcc4.dat")){
System.out.println("Line: " + line);
System.out.println("Mean: " + u.getMean());
System.out.println("Std: " + u.getStandardDeviation());
}
u.addValue(Double.parseDouble(line.trim()));
line = in.readLine();
}
in.close();
System.out.println(" ");
} catch (FileNotFoundException fnfe) {
log.error(fnfe.getMessage(), fnfe);
} catch (IOException ioe) {
log.error(ioe.getMessage(), ioe);
}
}
}

View File

@ -0,0 +1,260 @@
File Name: Lew.dat
File Format: ASCII
Header : lines 1 to 60 (= 60)
Certified Values: lines 41 to 43 (= 3)
Data : lines 61 to 260 (= 200)
Dataset Name: Lew (Beam Deflection Data)
Description: This is an observed/"real world" data set
consisting of 200 deflections of a steel-concrete
beam while subjected to periodic pressure.
The experimenter was H. S. Lew of the
Center for Building Technology at NIST.
We here use this data to test accuracy
in summary statistics calculations.
Stat Category: Univariate: Summary Statistics
Reference: None
Data: "Real World"
1 Response : y = beam deflection
0 Predictors
200 Observations
Model: Lower Level of Difficulty
2 Parameters : mu, sigma
1 Response Variable : y
0 Predictor Variables
y = mu + e
Certified Values
Sample Mean ybar: -177.435000000000
Sample Standard Deviation (denom. = n-1) s: 277.332168044316
Sample Autocorrelation Coefficient (lag 1) r(1): -0.307304800605679
Number of Observations: 200
Data: Y
----------
-213
-564
-35
-15
141
115
-420
-360
203
-338
-431
194
-220
-513
154
-125
-559
92
-21
-579
-52
99
-543
-175
162
-457
-346
204
-300
-474
164
-107
-572
-8
83
-541
-224
180
-420
-374
201
-236
-531
83
27
-564
-112
131
-507
-254
199
-311
-495
143
-46
-579
-90
136
-472
-338
202
-287
-477
169
-124
-568
17
48
-568
-135
162
-430
-422
172
-74
-577
-13
92
-534
-243
194
-355
-465
156
-81
-578
-64
139
-449
-384
193
-198
-538
110
-44
-577
-6
66
-552
-164
161
-460
-344
205
-281
-504
134
-28
-576
-118
156
-437
-381
200
-220
-540
83
11
-568
-160
172
-414
-408
188
-125
-572
-32
139
-492
-321
205
-262
-504
142
-83
-574
0
48
-571
-106
137
-501
-266
190
-391
-406
194
-186
-553
83
-13
-577
-49
103
-515
-280
201
300
-506
131
-45
-578
-80
138
-462
-361
201
-211
-554
32
74
-533
-235
187
-372
-442
182
-147
-566
25
68
-535
-244
194
-351
-463
174
-125
-570
15
72
-550
-190
172
-424
-385
198
-218
-536
96

View File

@ -0,0 +1,278 @@
File Name: Lottery.dat
File Format: ASCII
Header : lines 1 to 60 (= 60)
Certified Values: lines 41 to 43 (= 3)
Data : lines 61 to 278 (= 218)
Dataset Name: Lottery
Description: This is an observed/"real world" data set
consisting of 218 lottery values
from September 3, 1989 to April 14, 1990 (32 weeks).
One 3-digit random number (from 000 to 999)
is drawn per day, 7 days per week for most
weeks, but fewer days per week for some weeks.
We here use this data to test accuracy
in summary statistics calculations.
Stat Category: Univariate: Summary Statistics
Reference: None
Data: "Real World"
1 Response : y = 3-digit random number
0 Predictors
218 Observations
Model: Lower Level of Difficulty
2 Parameters : mu, sigma
1 Response Variable : y
0 Predictor Variables
y = mu + e
Certified Values
Sample Mean ybar: 518.958715596330
Sample Standard Deviation (denom. = n-1) s: 291.699727470969
Sample Autocorrelation Coefficient (lag 1) r(1): -0.120948622967393
Number of Observations: 218
Data: Y
----------
162
671
933
414
788
730
817
33
536
875
670
236
473
167
877
980
316
950
456
92
517
557
956
954
104
178
794
278
147
773
437
435
502
610
582
780
689
562
964
791
28
97
848
281
858
538
660
972
671
613
867
448
738
966
139
636
847
659
754
243
122
455
195
968
793
59
730
361
574
522
97
762
431
158
429
414
22
629
788
999
187
215
810
782
47
34
108
986
25
644
829
630
315
567
919
331
207
412
242
607
668
944
749
168
864
442
533
805
372
63
458
777
416
340
436
140
919
350
510
572
905
900
85
389
473
758
444
169
625
692
140
897
672
288
312
860
724
226
884
508
976
741
476
417
831
15
318
432
241
114
799
955
833
358
935
146
630
830
440
642
356
373
271
715
367
393
190
669
8
861
108
795
269
590
326
866
64
523
862
840
219
382
998
4
628
305
747
247
34
747
729
645
856
974
24
568
24
694
608
480
410
729
947
293
53
930
223
203
677
227
62
455
387
318
562
242
428
968

View File

@ -0,0 +1,110 @@
File Name: Mavro.dat
File Format: ASCII
Header : lines 1 to 60 (= 60)
Certified Values: lines 41 to 43 (= 3)
Data : lines 61 to 110 (= 50)
Dataset Name: Mavro (Filter Transmittance Data)
Description: This is an observed/"real world" data set
consisting of 50 transmittance measurements
(at a sampling rate of 10 observations per second)
from a filter with a nominal value of 2.
The experimenter was Radu Mavrodineaunu,
a member of the chemistry staff at NIST.
We here use this data to test accuracy
in summary statistics calculations.
Stat Category: Univariate: Summary Statistics
Reference: None
Data: "Real World"
1 Response : y = transmittance
0 Predictors
50 Observations
Model: Lower Level of Difficulty
2 Parameters : mu, sigma
1 Response Variable : y
0 Predictor Variables
y = mu + e
Certified Values
Sample Mean ybar: 2.00185600000000
Sample Standard Deviation (denom. = n-1) s: 0.000429123454003053
Sample Autocorrelation Coefficient (lag 1) r(1): 0.937989183438248
Number of Observations: 50
Data: Y
-------------
2.00180
2.00170
2.00180
2.00190
2.00180
2.00170
2.00150
2.00140
2.00150
2.00150
2.00170
2.00180
2.00180
2.00190
2.00190
2.00210
2.00200
2.00160
2.00140
2.00130
2.00130
2.00150
2.00150
2.00160
2.00150
2.00140
2.00130
2.00140
2.00150
2.00140
2.00150
2.00160
2.00150
2.00160
2.00190
2.00200
2.00200
2.00210
2.00220
2.00230
2.00240
2.00250
2.00270
2.00260
2.00260
2.00260
2.00270
2.00260
2.00250
2.00240

View File

@ -0,0 +1,160 @@
File Name: Michelso.dat
File Format: ASCII
Header : lines 1 to 60 (= 60)
Certified Values: lines 41 to 43 (= 3)
Data : lines 61 to 160 (= 100)
Dataset Name: Michelso (Speed of Light Data, in millions of meters per second)
Description: This is an observed/"real world" data set
consisting of 100 measurements of the
speed of light in air. This classic experiment
was carried out by Michelson is 1879.
We here use this data to test accuracy
in summary statistics calculations.
Stat Category: Univariate: Summary Statistics
Reference: Dorsey, Ernest N. (1944). The Velocity of Light.
Transactions of the American Philiosophical
Society, Volume 34, Part 1, Pages 1-110, Table 22.
y = mu + e
Certified Values
Sample Mean ybar: 299.852400000000
Sample Standard Deviation (denom. = n-1) s: 0.0790105478190518
Sample Autocorrelation Coefficient (lag 1) r(1): 0.535199668621283
Number of Observations: 100
Data: Y
----------
299.85
299.74
299.90
300.07
299.93
299.85
299.95
299.98
299.98
299.88
300.00
299.98
299.93
299.65
299.76
299.81
300.00
300.00
299.96
299.96
299.96
299.94
299.96
299.94
299.88
299.80
299.85
299.88
299.90
299.84
299.83
299.79
299.81
299.88
299.88
299.83
299.80
299.79
299.76
299.80
299.88
299.88
299.88
299.86
299.72
299.72
299.62
299.86
299.97
299.95
299.88
299.91
299.85
299.87
299.84
299.84
299.85
299.84
299.84
299.84
299.89
299.81
299.81
299.82
299.80
299.77
299.76
299.74
299.75
299.76
299.91
299.92
299.89
299.86
299.88
299.72
299.84
299.85
299.85
299.78
299.89
299.84
299.78
299.81
299.76
299.81
299.79
299.81
299.82
299.85
299.87
299.87
299.81
299.74
299.81
299.94
299.95
299.80
299.81
299.87

View File

@ -0,0 +1,63 @@
File Name: NumAcc1.dat
File Format: ASCII
Header : lines 1 to 60 (= 60)
Certified Values: lines 41 to 43 (= 3)
Data : lines 61 to 63 (= 3)
Dataset Name: NumAcc1
Description: This is a constructed/fabricated data set
to test accuracy in summary statistic calculations.
The numbers are large (8-digit integers) and
differ only in the last decimal place.
Note--by construction, this data set has
sample mean = 10000002 (exact)
sample standard deviation = 1 (exact)
sample autocorrelation coef. = -0.5 (exact)
Stat Category: Univariate: Summary Statistics
Reference: Simon, Stephen D. and Lesage, James P. (1989).
Assessing the Accuracy of ANOVA Caluclations
in Statistical Software", Computational
Statistics & data Analysis, 8, pp. 325-332.
Data: Constructed
1 Response : y
0 Predictors
3 Observations
Model: Lower Level of Difficulty
2 Parameters : mu, sigma
1 Response Variable : y
0 Predictor Variables
y = mu + e
Certified Values
Sample Mean ybar: 10000002
Sample Standard Deviation (denom. = n-1) s: 1
Sample Autocorrelation Coefficient (lag 1) r(1): -0.5
Number of Observations: 3
Data: Y
---------
10000001
10000003
10000002

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff