Added correlation package, Covariance class. JIRA: MATH-114
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@743688 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
01f82aed42
commit
42b68a69ce
|
@ -0,0 +1,205 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.math.stat.correlation;
|
||||||
|
|
||||||
|
import org.apache.commons.math.MathRuntimeException;
|
||||||
|
import org.apache.commons.math.linear.RealMatrix;
|
||||||
|
import org.apache.commons.math.linear.DenseRealMatrix;
|
||||||
|
import org.apache.commons.math.stat.descriptive.moment.Mean;
|
||||||
|
import org.apache.commons.math.stat.descriptive.moment.Variance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes covariances for pairs of arrays or columns of a matrix.
|
||||||
|
*
|
||||||
|
* <p>The constructors that take <code>RealMatrix</code> or
|
||||||
|
* <code>double[][]</code> arguments generate correlation matrices. The
|
||||||
|
* columns of the input matrices are assumed to represent variable values.</p>
|
||||||
|
*
|
||||||
|
* <p>The constructor argument <code>biasCorrected</code> determines whether or
|
||||||
|
* not computed covariances are bias-corrected.</p>
|
||||||
|
*
|
||||||
|
* <p>Unbiased covariances are given by the formula</p>
|
||||||
|
* <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
|
||||||
|
* where <code>E(x)</code> is the mean of <code>X</code> and <code>E(Y)</code>
|
||||||
|
* is the mean of the <code>Y</code> values.
|
||||||
|
*
|
||||||
|
* <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
|
||||||
|
*
|
||||||
|
* @version $Revision$ $Date$
|
||||||
|
* @since 2.0
|
||||||
|
*/
|
||||||
|
public class Covariance {
|
||||||
|
|
||||||
|
/** covariance matrix */
|
||||||
|
private final RealMatrix covarianceMatrix;
|
||||||
|
|
||||||
|
public Covariance() {
|
||||||
|
super();
|
||||||
|
covarianceMatrix = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a Covariance matrix from a rectangular array
|
||||||
|
* whose columns represent covariates.
|
||||||
|
*
|
||||||
|
* <p>The <code>biasCorrected</code> parameter determines whether or not
|
||||||
|
* covariance estimates are bias-corrected.</p>
|
||||||
|
*
|
||||||
|
* <p>The input array must be rectangular with at least two columns
|
||||||
|
* and two rows.</p>
|
||||||
|
*
|
||||||
|
* @param data rectangular array with columns representing covariates
|
||||||
|
* @param biasCorrected true means covariances are bias-corrected
|
||||||
|
* @throws IllegalArgumentException if the input data array is not
|
||||||
|
* rectangular with at least two rows and two columns.
|
||||||
|
*/
|
||||||
|
public Covariance(double[][] data, boolean biasCorrected) {
|
||||||
|
this(new DenseRealMatrix(data), biasCorrected);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a Covariance matrix from a rectangular array
|
||||||
|
* whose columns represent covariates.
|
||||||
|
*
|
||||||
|
* <p>The input array must be rectangular with at least two columns
|
||||||
|
* and two rows</p>
|
||||||
|
*
|
||||||
|
* @param data rectangular array with columns representing covariates
|
||||||
|
* @throws IllegalArgumentException if the input data array is not
|
||||||
|
* rectangular with at least two rows and two columns.
|
||||||
|
*/
|
||||||
|
public Covariance(double[][] data) {
|
||||||
|
this(data, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a covariance matrix from a matrix whose columns
|
||||||
|
* represent covariates.
|
||||||
|
*
|
||||||
|
* <p>The <code>biasCorrected</code> parameter determines whether or not
|
||||||
|
* covariance estimates are bias-corrected.</p>
|
||||||
|
*
|
||||||
|
* <p>The matrix must have at least two columns and two rows</p>
|
||||||
|
*
|
||||||
|
* @param matrix matrix with columns representing covariates
|
||||||
|
* @param biasCorrected true means covariances are bias-corrected
|
||||||
|
* @throws IllegalArgumentException if the input matrix does not have
|
||||||
|
* at least two rows and two columns
|
||||||
|
*/
|
||||||
|
public Covariance(RealMatrix matrix, boolean biasCorrected) {
|
||||||
|
checkSufficientData(matrix);
|
||||||
|
covarianceMatrix = computeCovariance(matrix, biasCorrected);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a covariance matrix from a matrix whose columns
|
||||||
|
* represent covariates.
|
||||||
|
*
|
||||||
|
* <p>The matrix must have at least two columns and two rows</p>
|
||||||
|
*
|
||||||
|
* @param matrix matrix with columns representing covariates
|
||||||
|
* @throws IllegalArgumentException if the input matrix does not have
|
||||||
|
* at least two rows and two columns
|
||||||
|
*/
|
||||||
|
public Covariance(RealMatrix matrix) {
|
||||||
|
this(matrix, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the covariance matrix
|
||||||
|
*
|
||||||
|
* @return covariance matrix
|
||||||
|
*/
|
||||||
|
public RealMatrix getCovarianceMatrix() {
|
||||||
|
return covarianceMatrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a covariance matrix from a matrix whose columns represent
|
||||||
|
* covariates.
|
||||||
|
*
|
||||||
|
* <p>The input matrix must have at least two columns and two rows</p>
|
||||||
|
*
|
||||||
|
* <p>The <code>biasCorrected</code> parameter determines whether or not
|
||||||
|
* covariance estimates are bias-corrected.</p>
|
||||||
|
*
|
||||||
|
* @return covariance matrix
|
||||||
|
*/
|
||||||
|
protected RealMatrix computeCovariance(RealMatrix matrix, boolean biasCorrected) {
|
||||||
|
int dimension = matrix.getColumnDimension();
|
||||||
|
Variance variance = new Variance(biasCorrected);
|
||||||
|
RealMatrix outMatrix = new DenseRealMatrix(dimension, dimension);
|
||||||
|
for (int i = 0; i < dimension; i++) {
|
||||||
|
for (int j = 0; j < i; j++) {
|
||||||
|
double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
|
||||||
|
outMatrix.setEntry(i, j, cov);
|
||||||
|
outMatrix.setEntry(j, i, cov);
|
||||||
|
}
|
||||||
|
outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
|
||||||
|
}
|
||||||
|
return outMatrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the covariance between the two arrays.
|
||||||
|
*
|
||||||
|
* <p>Array lengths must match and the common length must be at least 2.</p>
|
||||||
|
*
|
||||||
|
* @param xArray first data array
|
||||||
|
* @param yArray second data array
|
||||||
|
* @param biasCorrected if true, returned value will be bias-corrected
|
||||||
|
* @return returns the covariance for the two arrays
|
||||||
|
* @throws IllegalArgumentException if the arrays lengths do not match or
|
||||||
|
* there is insufficient data
|
||||||
|
*/
|
||||||
|
public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
Mean mean = new Mean();
|
||||||
|
double result = 0d;
|
||||||
|
long length = xArray.length;
|
||||||
|
if(length == yArray.length && length > 1) {
|
||||||
|
double xMean = mean.evaluate(xArray);
|
||||||
|
double yMean = mean.evaluate(yArray);
|
||||||
|
for (int i = 0; i < xArray.length; i++) {
|
||||||
|
double xDev = xArray[i] - xMean;
|
||||||
|
double yDev = yArray[i] - yMean;
|
||||||
|
result += (xDev * yDev - result) / (i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw MathRuntimeException.createIllegalArgumentException(
|
||||||
|
"Arrays must have the same length and both must have at " +
|
||||||
|
"least two elements. xArray has size {0}, yArray has {1} elements",
|
||||||
|
new Object[] {xArray.length, yArray.length});
|
||||||
|
}
|
||||||
|
return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throws IllegalArgumentException of the matrix does not have at least
|
||||||
|
* two columns and two rows
|
||||||
|
*/
|
||||||
|
private void checkSufficientData(final RealMatrix matrix) {
|
||||||
|
int nRows = matrix.getRowDimension();
|
||||||
|
int nCols = matrix.getColumnDimension();
|
||||||
|
if (nRows < 2 || nCols < 2) {
|
||||||
|
throw MathRuntimeException.createIllegalArgumentException(
|
||||||
|
"Insufficient data: only {0} rows and {1} columns.",
|
||||||
|
new Object[]{nRows, nCols});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,9 +39,16 @@ The <action> type attribute can be add,update,fix,remove.
|
||||||
</properties>
|
</properties>
|
||||||
<body>
|
<body>
|
||||||
<release version="2.0" date="TBD" description="TBD">
|
<release version="2.0" date="TBD" description="TBD">
|
||||||
|
<action dev="psteitz" type="add" issue="MATH-114">
|
||||||
|
Added Covariance class to compute variance-covariance matrices in new
|
||||||
|
correlation package.
|
||||||
|
</action>
|
||||||
<action dev="luc" type="fix" issue="MATH-216" due-to="Cyril Briquet">
|
<action dev="luc" type="fix" issue="MATH-216" due-to="Cyril Briquet">
|
||||||
Improved fast Fourier transform efficiency.
|
Improved fast Fourier transform efficiency.
|
||||||
</action>
|
</action>
|
||||||
|
<action dev="billbarker" type="add">
|
||||||
|
Added a SparseRealVector class that implements a sparse vector for the RealVector interface.
|
||||||
|
</action>
|
||||||
<action dev="luc" type="add" >
|
<action dev="luc" type="add" >
|
||||||
Added factory methods to create Chebyshev, Hermite, Laguerre and Legendre polynomials.
|
Added factory methods to create Chebyshev, Hermite, Laguerre and Legendre polynomials.
|
||||||
</action>
|
</action>
|
||||||
|
@ -303,9 +310,6 @@ The <action> type attribute can be add,update,fix,remove.
|
||||||
<action dev="brentworden" type="fix" issue="MATH-204" due-to="Mick">
|
<action dev="brentworden" type="fix" issue="MATH-204" due-to="Mick">
|
||||||
Added root checks for the endpoints.
|
Added root checks for the endpoints.
|
||||||
</action>
|
</action>
|
||||||
<action dev="billbarker" type="add">
|
|
||||||
Added a SparseRealVector class that implements a sparse vector for the RealVector interface.
|
|
||||||
</action>
|
|
||||||
</release>
|
</release>
|
||||||
<release version="1.2" date="2008-02-24"
|
<release version="1.2" date="2008-02-24"
|
||||||
description="This release combines bug fixes and new features. Most notable
|
description="This release combines bug fixes and new features. Most notable
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
# R source file to validate covariance tests in
|
||||||
|
# org.apache.commons.math.stat.correlation.CovarianceTest
|
||||||
|
#
|
||||||
|
# To run the test, install R, put this file and testFunctions
|
||||||
|
# into the same directory, launch R from this directory and then enter
|
||||||
|
# source("<name-of-this-file>")
|
||||||
|
#
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
tol <- 1E-9 # error tolerance for tests
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
# Function definitions
|
||||||
|
|
||||||
|
source("testFunctions") # utility test functions
|
||||||
|
options(digits=16) # override number of digits displayed
|
||||||
|
|
||||||
|
# function to verify covariance computations
|
||||||
|
verifyCovariance <- function(matrix, expectedCovariance, name) {
|
||||||
|
covariance <- cov(matrix)
|
||||||
|
output <- c("Covariance test dataset = ", name)
|
||||||
|
if (assertEquals(expectedCovariance,covariance,tol,"Covariances")) {
|
||||||
|
displayPadded(output, SUCCEEDED, WIDTH)
|
||||||
|
} else {
|
||||||
|
displayPadded(output, FAILED, WIDTH)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#--------------------------------------------------------------------------
|
||||||
|
cat("Covariance test cases\n")
|
||||||
|
|
||||||
|
# Longley
|
||||||
|
|
||||||
|
longley <- matrix(c(60323,83.0,234289,2356,1590,107608,1947,
|
||||||
|
61122,88.5,259426,2325,1456,108632,1948,
|
||||||
|
60171,88.2,258054,3682,1616,109773,1949,
|
||||||
|
61187,89.5,284599,3351,1650,110929,1950,
|
||||||
|
63221,96.2,328975,2099,3099,112075,1951,
|
||||||
|
63639,98.1,346999,1932,3594,113270,1952,
|
||||||
|
64989,99.0,365385,1870,3547,115094,1953,
|
||||||
|
63761,100.0,363112,3578,3350,116219,1954,
|
||||||
|
66019,101.2,397469,2904,3048,117388,1955,
|
||||||
|
67857,104.6,419180,2822,2857,118734,1956,
|
||||||
|
68169,108.4,442769,2936,2798,120445,1957,
|
||||||
|
66513,110.8,444546,4681,2637,121950,1958,
|
||||||
|
68655,112.6,482704,3813,2552,123366,1959,
|
||||||
|
69564,114.2,502601,3931,2514,125368,1960,
|
||||||
|
69331,115.7,518173,4806,2572,127852,1961,
|
||||||
|
70551,116.9,554894,4007,2827,130081,1962),
|
||||||
|
nrow = 16, ncol = 7, byrow = TRUE)
|
||||||
|
|
||||||
|
expectedCovariance <- matrix(c(
|
||||||
|
12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
|
||||||
|
1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
|
||||||
|
36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
|
||||||
|
6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
|
||||||
|
343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
|
||||||
|
56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
|
||||||
|
1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
|
||||||
|
873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
|
||||||
|
1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
|
||||||
|
-115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
|
||||||
|
23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
|
||||||
|
4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
|
||||||
|
16240.93333333333, 5.092333333333334e+01, 470977.900000000,
|
||||||
|
2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667),
|
||||||
|
nrow = 7, ncol = 7, byrow = TRUE)
|
||||||
|
|
||||||
|
verifyCovariance(longley, expectedCovariance, "longley")
|
||||||
|
|
||||||
|
# Swiss Fertility
|
||||||
|
|
||||||
|
fertility <- matrix(c(80.2,17.0,15,12,9.96,
|
||||||
|
83.1,45.1,6,9,84.84,
|
||||||
|
92.5,39.7,5,5,93.40,
|
||||||
|
85.8,36.5,12,7,33.77,
|
||||||
|
76.9,43.5,17,15,5.16,
|
||||||
|
76.1,35.3,9,7,90.57,
|
||||||
|
83.8,70.2,16,7,92.85,
|
||||||
|
92.4,67.8,14,8,97.16,
|
||||||
|
82.4,53.3,12,7,97.67,
|
||||||
|
82.9,45.2,16,13,91.38,
|
||||||
|
87.1,64.5,14,6,98.61,
|
||||||
|
64.1,62.0,21,12,8.52,
|
||||||
|
66.9,67.5,14,7,2.27,
|
||||||
|
68.9,60.7,19,12,4.43,
|
||||||
|
61.7,69.3,22,5,2.82,
|
||||||
|
68.3,72.6,18,2,24.20,
|
||||||
|
71.7,34.0,17,8,3.30,
|
||||||
|
55.7,19.4,26,28,12.11,
|
||||||
|
54.3,15.2,31,20,2.15,
|
||||||
|
65.1,73.0,19,9,2.84,
|
||||||
|
65.5,59.8,22,10,5.23,
|
||||||
|
65.0,55.1,14,3,4.52,
|
||||||
|
56.6,50.9,22,12,15.14,
|
||||||
|
57.4,54.1,20,6,4.20,
|
||||||
|
72.5,71.2,12,1,2.40,
|
||||||
|
74.2,58.1,14,8,5.23,
|
||||||
|
72.0,63.5,6,3,2.56,
|
||||||
|
60.5,60.8,16,10,7.72,
|
||||||
|
58.3,26.8,25,19,18.46,
|
||||||
|
65.4,49.5,15,8,6.10,
|
||||||
|
75.5,85.9,3,2,99.71,
|
||||||
|
69.3,84.9,7,6,99.68,
|
||||||
|
77.3,89.7,5,2,100.00,
|
||||||
|
70.5,78.2,12,6,98.96,
|
||||||
|
79.4,64.9,7,3,98.22,
|
||||||
|
65.0,75.9,9,9,99.06,
|
||||||
|
92.2,84.6,3,3,99.46,
|
||||||
|
79.3,63.1,13,13,96.83,
|
||||||
|
70.4,38.4,26,12,5.62,
|
||||||
|
65.7,7.7,29,11,13.79,
|
||||||
|
72.7,16.7,22,13,11.22,
|
||||||
|
64.4,17.6,35,32,16.92,
|
||||||
|
77.6,37.6,15,7,4.97,
|
||||||
|
67.6,18.7,25,7,8.65,
|
||||||
|
35.0,1.2,37,53,42.34,
|
||||||
|
44.7,46.6,16,29,50.43,
|
||||||
|
42.8,27.7,22,29,58.33),
|
||||||
|
nrow = 47, ncol = 5, byrow = TRUE)
|
||||||
|
|
||||||
|
expectedCovariance <- matrix(c(
|
||||||
|
156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
|
||||||
|
100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
|
||||||
|
-64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
|
||||||
|
-79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
|
||||||
|
241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890),
|
||||||
|
nrow = 5, ncol = 5, byrow = TRUE)
|
||||||
|
|
||||||
|
verifyCovariance(fertility, expectedCovariance, "swiss fertility")
|
||||||
|
|
||||||
|
displayDashes(WIDTH)
|
|
@ -47,6 +47,9 @@ source("descriptiveTestCases")
|
||||||
# multiple regression
|
# multiple regression
|
||||||
source("multipleOLSRegressionTestCases")
|
source("multipleOLSRegressionTestCases")
|
||||||
|
|
||||||
|
# covariance
|
||||||
|
source("covarianceTestCases")
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# if output has been diverted, change it back
|
# if output has been diverted, change it back
|
||||||
if (sink.number()) {
|
if (sink.number()) {
|
||||||
|
|
|
@ -0,0 +1,225 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.math.stat.correlation;
|
||||||
|
|
||||||
|
import org.apache.commons.math.TestUtils;
|
||||||
|
import org.apache.commons.math.linear.RealMatrix;
|
||||||
|
import org.apache.commons.math.linear.RealMatrixImpl;
|
||||||
|
import org.apache.commons.math.stat.descriptive.moment.Variance;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class CovarianceTest extends TestCase {
|
||||||
|
|
||||||
|
protected final double[] longleyData = new double[] {
|
||||||
|
60323,83.0,234289,2356,1590,107608,1947,
|
||||||
|
61122,88.5,259426,2325,1456,108632,1948,
|
||||||
|
60171,88.2,258054,3682,1616,109773,1949,
|
||||||
|
61187,89.5,284599,3351,1650,110929,1950,
|
||||||
|
63221,96.2,328975,2099,3099,112075,1951,
|
||||||
|
63639,98.1,346999,1932,3594,113270,1952,
|
||||||
|
64989,99.0,365385,1870,3547,115094,1953,
|
||||||
|
63761,100.0,363112,3578,3350,116219,1954,
|
||||||
|
66019,101.2,397469,2904,3048,117388,1955,
|
||||||
|
67857,104.6,419180,2822,2857,118734,1956,
|
||||||
|
68169,108.4,442769,2936,2798,120445,1957,
|
||||||
|
66513,110.8,444546,4681,2637,121950,1958,
|
||||||
|
68655,112.6,482704,3813,2552,123366,1959,
|
||||||
|
69564,114.2,502601,3931,2514,125368,1960,
|
||||||
|
69331,115.7,518173,4806,2572,127852,1961,
|
||||||
|
70551,116.9,554894,4007,2827,130081,1962
|
||||||
|
};
|
||||||
|
|
||||||
|
protected final double[] swissData = new double[] {
|
||||||
|
80.2,17.0,15,12,9.96,
|
||||||
|
83.1,45.1,6,9,84.84,
|
||||||
|
92.5,39.7,5,5,93.40,
|
||||||
|
85.8,36.5,12,7,33.77,
|
||||||
|
76.9,43.5,17,15,5.16,
|
||||||
|
76.1,35.3,9,7,90.57,
|
||||||
|
83.8,70.2,16,7,92.85,
|
||||||
|
92.4,67.8,14,8,97.16,
|
||||||
|
82.4,53.3,12,7,97.67,
|
||||||
|
82.9,45.2,16,13,91.38,
|
||||||
|
87.1,64.5,14,6,98.61,
|
||||||
|
64.1,62.0,21,12,8.52,
|
||||||
|
66.9,67.5,14,7,2.27,
|
||||||
|
68.9,60.7,19,12,4.43,
|
||||||
|
61.7,69.3,22,5,2.82,
|
||||||
|
68.3,72.6,18,2,24.20,
|
||||||
|
71.7,34.0,17,8,3.30,
|
||||||
|
55.7,19.4,26,28,12.11,
|
||||||
|
54.3,15.2,31,20,2.15,
|
||||||
|
65.1,73.0,19,9,2.84,
|
||||||
|
65.5,59.8,22,10,5.23,
|
||||||
|
65.0,55.1,14,3,4.52,
|
||||||
|
56.6,50.9,22,12,15.14,
|
||||||
|
57.4,54.1,20,6,4.20,
|
||||||
|
72.5,71.2,12,1,2.40,
|
||||||
|
74.2,58.1,14,8,5.23,
|
||||||
|
72.0,63.5,6,3,2.56,
|
||||||
|
60.5,60.8,16,10,7.72,
|
||||||
|
58.3,26.8,25,19,18.46,
|
||||||
|
65.4,49.5,15,8,6.10,
|
||||||
|
75.5,85.9,3,2,99.71,
|
||||||
|
69.3,84.9,7,6,99.68,
|
||||||
|
77.3,89.7,5,2,100.00,
|
||||||
|
70.5,78.2,12,6,98.96,
|
||||||
|
79.4,64.9,7,3,98.22,
|
||||||
|
65.0,75.9,9,9,99.06,
|
||||||
|
92.2,84.6,3,3,99.46,
|
||||||
|
79.3,63.1,13,13,96.83,
|
||||||
|
70.4,38.4,26,12,5.62,
|
||||||
|
65.7,7.7,29,11,13.79,
|
||||||
|
72.7,16.7,22,13,11.22,
|
||||||
|
64.4,17.6,35,32,16.92,
|
||||||
|
77.6,37.6,15,7,4.97,
|
||||||
|
67.6,18.7,25,7,8.65,
|
||||||
|
35.0,1.2,37,53,42.34,
|
||||||
|
44.7,46.6,16,29,50.43,
|
||||||
|
42.8,27.7,22,29,58.33
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Longley dataset against R.
|
||||||
|
* Data Source: J. Longley (1967) "An Appraisal of Least Squares
|
||||||
|
* Programs for the Electronic Computer from the Point of View of the User"
|
||||||
|
* Journal of the American Statistical Association, vol. 62. September,
|
||||||
|
* pp. 819-841.
|
||||||
|
*
|
||||||
|
* Data are from NIST:
|
||||||
|
* http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
|
||||||
|
*/
|
||||||
|
public void testLongly() {
|
||||||
|
RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
|
||||||
|
RealMatrix covarianceMatrix = new Covariance(matrix).getCovarianceMatrix();
|
||||||
|
double[] rData = new double[] {
|
||||||
|
12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
|
||||||
|
1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
|
||||||
|
36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
|
||||||
|
6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
|
||||||
|
343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
|
||||||
|
56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
|
||||||
|
1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
|
||||||
|
873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
|
||||||
|
1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
|
||||||
|
-115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
|
||||||
|
23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
|
||||||
|
4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
|
||||||
|
16240.93333333333, 5.092333333333334e+01, 470977.900000000,
|
||||||
|
2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
|
||||||
|
};
|
||||||
|
|
||||||
|
TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-9);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test R Swiss fertility dataset against R.
|
||||||
|
* Data Source: R datasets package
|
||||||
|
*/
|
||||||
|
public void testSwissFertility() {
|
||||||
|
RealMatrix matrix = createRealMatrix(swissData, 47, 5);
|
||||||
|
RealMatrix covarianceMatrix = new Covariance(matrix).getCovarianceMatrix();
|
||||||
|
double[] rData = new double[] {
|
||||||
|
156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
|
||||||
|
100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
|
||||||
|
-64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
|
||||||
|
-79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
|
||||||
|
241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
|
||||||
|
};
|
||||||
|
|
||||||
|
TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constant column
|
||||||
|
*/
|
||||||
|
public void testConstant() {
|
||||||
|
double[] noVariance = new double[] {1, 1, 1, 1};
|
||||||
|
double[] values = new double[] {1, 2, 3, 4};
|
||||||
|
assertEquals(0d, new Covariance().covariance(noVariance, values, true), Double.MIN_VALUE);
|
||||||
|
assertEquals(0d, new Covariance().covariance(noVariance, noVariance, true), Double.MIN_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insufficient data
|
||||||
|
*/
|
||||||
|
public void testInsufficientData() {
|
||||||
|
double[] one = new double[] {1};
|
||||||
|
double[] two = new double[] {2};
|
||||||
|
try {
|
||||||
|
new Covariance().covariance(one, two, false);
|
||||||
|
fail("Expecting IllegalArgumentException");
|
||||||
|
} catch (IllegalArgumentException ex) {
|
||||||
|
// Expected
|
||||||
|
}
|
||||||
|
RealMatrix matrix = new RealMatrixImpl(new double[][] {{0},{1}});
|
||||||
|
try {
|
||||||
|
new Covariance(matrix);
|
||||||
|
fail("Expecting IllegalArgumentException");
|
||||||
|
} catch (IllegalArgumentException ex) {
|
||||||
|
// Expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify that diagonal entries are consistent with Variance computation and matrix matches
|
||||||
|
* column-by-column covariances
|
||||||
|
*/
|
||||||
|
public void testConsistency() {
|
||||||
|
RealMatrix matrix = createRealMatrix(swissData, 47, 5);
|
||||||
|
RealMatrix covarianceMatrix = new Covariance(matrix).getCovarianceMatrix();
|
||||||
|
|
||||||
|
// Variances on the diagonal
|
||||||
|
Variance variance = new Variance();
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
assertEquals(variance.evaluate(matrix.getColumn(i)), covarianceMatrix.getEntry(i,i), 10E-14);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Symmetry, column-consistency
|
||||||
|
assertEquals(covarianceMatrix.getEntry(2, 3),
|
||||||
|
new Covariance().covariance(matrix.getColumn(2), matrix.getColumn(3), true), 10E-14);
|
||||||
|
assertEquals(covarianceMatrix.getEntry(2, 3), covarianceMatrix.getEntry(3, 2), Double.MIN_VALUE);
|
||||||
|
|
||||||
|
// All columns same -> all entries = column variance
|
||||||
|
RealMatrix repeatedColumns = new RealMatrixImpl(47, 3);
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
repeatedColumns.setColumnMatrix(i, matrix.getColumnMatrix(0));
|
||||||
|
}
|
||||||
|
covarianceMatrix = new Covariance(repeatedColumns).getCovarianceMatrix();
|
||||||
|
double columnVariance = variance.evaluate(matrix.getColumn(0));
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
assertEquals(columnVariance, covarianceMatrix.getEntry(i, j), 10E-14);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
|
||||||
|
double[][] matrixData = new double[nRows][nCols];
|
||||||
|
int ptr = 0;
|
||||||
|
for (int i = 0; i < nRows; i++) {
|
||||||
|
System.arraycopy(data, ptr, matrixData[i], 0, nCols);
|
||||||
|
ptr += nCols;
|
||||||
|
}
|
||||||
|
return new RealMatrixImpl(matrixData);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue