Added storeless covariance implementation contributed by Patrick Meyer. JIRA: MATH-449.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1160026 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2011-08-21 18:14:19 +00:00
parent 78c368544d
commit 22e68e3efe
4 changed files with 502 additions and 0 deletions

View File

@ -189,6 +189,9 @@
<contributor>
<name>Benjamin McCann</name>
</contributor>
<contributor>
<name>Patrick Meyer</name>
</contributor>
<contributor>
<name>J. Lewis Muir</name>
</contributor>

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.correlation;
import org.apache.commons.math.exception.MathIllegalArgumentException;
import org.apache.commons.math.exception.util.LocalizedFormats;
/**
* Bivariate Covariance implementation that does not require input data to be
* stored in memory.
*
* @version $Id$
* @since 3.0
*/
public class StorelessBivariateCovariance {
private double deltaX = 0.0;
private double deltaY = 0.0;
private double meanX = 0.0;
private double meanY = 0.0;
private double n = 0;
private double covarianceNumerator = 0.0;
private boolean biasCorrected = true;
public StorelessBivariateCovariance(){
}
public StorelessBivariateCovariance(boolean biasCorrected){
this.biasCorrected = biasCorrected;
}
public void increment(double x, double y){
n++;
deltaX = x - meanX;
deltaY = y - meanY;
meanX += deltaX / n;
meanY += deltaY / n;
covarianceNumerator += ((n-1.0) / n) * deltaX * deltaY;
}
public double getN(){
return n;
}
public double getResult()throws IllegalArgumentException{
if (n < 2) throw new MathIllegalArgumentException(
LocalizedFormats.INSUFFICIENT_DIMENSION, n, 2);
if(biasCorrected){
return covarianceNumerator / (n - 1d);
}else{
return covarianceNumerator / n;
}
}
}

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.correlation;
import org.apache.commons.math.exception.MathIllegalArgumentException;
import org.apache.commons.math.exception.MathUnsupportedOperationException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.linear.Array2DRowRealMatrix;
import org.apache.commons.math.linear.RealMatrix;
/**
* Covariance implementation that does not require input data to be
* stored in memory.
*
* @version $Id$
* @since 3.0
*/
public class StorelessCovariance extends Covariance {
private StorelessBivariateCovariance[][] covMatrix = null;
private int rowDimension = 1;
private int colDimension = 1;
private boolean biasCorrected = true;
public StorelessCovariance(int rowDimension, int colDimension){
this(rowDimension, colDimension, true);
}
public StorelessCovariance(int rowDimension, int colDimension, boolean biasCorrected){
this.rowDimension = rowDimension;
this.colDimension = colDimension;
this.biasCorrected = biasCorrected;
covMatrix = new StorelessBivariateCovariance[rowDimension][colDimension];
initializeMatrix();
}
private void initializeMatrix(){
for(int i=0;i<rowDimension;i++){
for(int j=0;j<colDimension;j++){
covMatrix[i][j] = new StorelessBivariateCovariance(biasCorrected);
}
}
}
public StorelessBivariateCovariance getCovariance(int xIndex, int yIndex){
return covMatrix[xIndex][yIndex];
}
public void setCovariance(int xIndex, int yIndex, StorelessBivariateCovariance cov){
covMatrix[xIndex][yIndex] = cov;
}
public void incrementCovariance(int xIndex, int yIndex, double x, double y){
covMatrix[xIndex][yIndex].increment(x, y);
}
public void incrementRow(double[] rowData)throws IllegalArgumentException{
int length = rowData.length;
if (length != colDimension) {
throw new MathIllegalArgumentException(
LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, colDimension);
}
for(int i=0;i<length;i++){
for(int j=0;j<length;j++){
covMatrix[i][j].increment(rowData[i], rowData[j]);
}
}
}
@Override
public RealMatrix getCovarianceMatrix() throws IllegalArgumentException {
RealMatrix matrix = new Array2DRowRealMatrix(rowDimension, colDimension);
for(int i=0;i<rowDimension;i++){
for(int j=0;j<colDimension;j++){
matrix.setEntry(i, j, covMatrix[i][j].getResult());
}
}
return matrix;
}
public double[][] getData() throws IllegalArgumentException {
double[][] data = new double[rowDimension][rowDimension];
for(int i=0;i<rowDimension;i++){
for(int j=0;j<colDimension;j++){
data[i][j] = covMatrix[i][j].getResult();
}
}
return data;
}
/**
* This {@link Covariance} method is not supported by StorelessCovariance, since
* the number of bivariate observations does not have to be the same for different
* pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
*/
@Override
public int getN() {
throw new MathUnsupportedOperationException();
}
}

View File

@ -0,0 +1,305 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.correlation;
import org.apache.commons.math.TestUtils;
import org.apache.commons.math.linear.Array2DRowRealMatrix;
import org.apache.commons.math.linear.RealMatrix;
import org.junit.Test;
public class StorelessCovarianceTest {
protected final double[] longleyData = new double[] {
60323,83.0,234289,2356,1590,107608,1947,
61122,88.5,259426,2325,1456,108632,1948,
60171,88.2,258054,3682,1616,109773,1949,
61187,89.5,284599,3351,1650,110929,1950,
63221,96.2,328975,2099,3099,112075,1951,
63639,98.1,346999,1932,3594,113270,1952,
64989,99.0,365385,1870,3547,115094,1953,
63761,100.0,363112,3578,3350,116219,1954,
66019,101.2,397469,2904,3048,117388,1955,
67857,104.6,419180,2822,2857,118734,1956,
68169,108.4,442769,2936,2798,120445,1957,
66513,110.8,444546,4681,2637,121950,1958,
68655,112.6,482704,3813,2552,123366,1959,
69564,114.2,502601,3931,2514,125368,1960,
69331,115.7,518173,4806,2572,127852,1961,
70551,116.9,554894,4007,2827,130081,1962
};
protected final double[] swissData = new double[] {
80.2,17.0,15,12,9.96,
83.1,45.1,6,9,84.84,
92.5,39.7,5,5,93.40,
85.8,36.5,12,7,33.77,
76.9,43.5,17,15,5.16,
76.1,35.3,9,7,90.57,
83.8,70.2,16,7,92.85,
92.4,67.8,14,8,97.16,
82.4,53.3,12,7,97.67,
82.9,45.2,16,13,91.38,
87.1,64.5,14,6,98.61,
64.1,62.0,21,12,8.52,
66.9,67.5,14,7,2.27,
68.9,60.7,19,12,4.43,
61.7,69.3,22,5,2.82,
68.3,72.6,18,2,24.20,
71.7,34.0,17,8,3.30,
55.7,19.4,26,28,12.11,
54.3,15.2,31,20,2.15,
65.1,73.0,19,9,2.84,
65.5,59.8,22,10,5.23,
65.0,55.1,14,3,4.52,
56.6,50.9,22,12,15.14,
57.4,54.1,20,6,4.20,
72.5,71.2,12,1,2.40,
74.2,58.1,14,8,5.23,
72.0,63.5,6,3,2.56,
60.5,60.8,16,10,7.72,
58.3,26.8,25,19,18.46,
65.4,49.5,15,8,6.10,
75.5,85.9,3,2,99.71,
69.3,84.9,7,6,99.68,
77.3,89.7,5,2,100.00,
70.5,78.2,12,6,98.96,
79.4,64.9,7,3,98.22,
65.0,75.9,9,9,99.06,
92.2,84.6,3,3,99.46,
79.3,63.1,13,13,96.83,
70.4,38.4,26,12,5.62,
65.7,7.7,29,11,13.79,
72.7,16.7,22,13,11.22,
64.4,17.6,35,32,16.92,
77.6,37.6,15,7,4.97,
67.6,18.7,25,7,8.65,
35.0,1.2,37,53,42.34,
44.7,46.6,16,29,50.43,
42.8,27.7,22,29,58.33
};
protected final double[][] longleyDataSimple = {
{60323, 83.0},
{61122,88.5},
{60171, 88.2},
{61187, 89.5},
{63221, 96.2},
{63639, 98.1},
{64989, 99.0},
{63761, 100.0},
{66019, 101.2},
{67857, 104.6},
{68169, 108.4},
{66513, 110.8},
{68655, 112.6},
{69564, 114.2},
{69331, 115.7},
{70551, 116.9}
};
@Test
public void testLonglySimpleVar(){
double rCov = 12333921.73333333246;
StorelessBivariateCovariance cov = new StorelessBivariateCovariance();
for(int i=0;i<longleyDataSimple.length;i++){
cov.increment(longleyDataSimple[i][0],longleyDataSimple[i][0]);
}
TestUtils.assertEquals("simple covariance test", rCov, cov.getResult(), 10E-7);
}
@Test
public void testLonglySimpleCov(){
double rCov = 36796.660000;
StorelessBivariateCovariance cov = new StorelessBivariateCovariance();
for(int i=0;i<longleyDataSimple.length;i++){
cov.increment(longleyDataSimple[i][0], longleyDataSimple[i][1]);
}
TestUtils.assertEquals("simple covariance test", rCov, cov.getResult(), 10E-7);
}
/**
* Test Longley dataset against R.
* Data Source: J. Longley (1967) "An Appraisal of Least Squares
* Programs for the Electronic Computer from the Point of View of the User"
* Journal of the American Statistical Association, vol. 62. September,
* pp. 819-841.
*
* Data are from NIST:
* http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
*/
@Test
public void testLonglyByRow() {
RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
double[] rData = new double[] {
12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
-115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
16240.93333333333, 5.092333333333334e+01, 470977.900000000,
2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
};
StorelessCovariance covMatrix = new StorelessCovariance(7, 7);
for(int i=0;i<matrix.getRowDimension();i++){
covMatrix.incrementRow(matrix.getRow(i));
}
RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-7);
}
/**
* Test R Swiss fertility dataset against R.
* Data Source: R datasets package
*/
@Test
public void testSwissFertilityByRow() {
RealMatrix matrix = createRealMatrix(swissData, 47, 5);
double[] rData = new double[] {
156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
-64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
-79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
};
StorelessCovariance covMatrix = new StorelessCovariance(5, 5);
for(int i=0;i<matrix.getRowDimension();i++){
covMatrix.incrementRow(matrix.getRow(i));
}
RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13);
}
/**
* Test Longley dataset against R.
* Data Source: J. Longley (1967) "An Appraisal of Least Squares
* Programs for the Electronic Computer from the Point of View of the User"
* Journal of the American Statistical Association, vol. 62. September,
* pp. 819-841.
*
* Data are from NIST:
* http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
*/
@Test
public void testLonglyByEntry() {
RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
double[] rData = new double[] {
12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
-115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
16240.93333333333, 5.092333333333334e+01, 470977.900000000,
2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
};
int row = matrix.getRowDimension();
int col = matrix.getColumnDimension();
double x = 0.0;
double y = 0.0;
StorelessCovariance covMatrix = new StorelessCovariance(7, 7);
for(int i=0;i<row;i++){
for(int j=0;j<col;j++){
x = matrix.getEntry(i, j);
for(int k=0;k<col;k++){
y = matrix.getEntry(i, k);
covMatrix.incrementCovariance(j, k, x, y);
}
}
}
RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-7);
}
/**
* Test R Swiss fertility dataset against R.
* Data Source: R datasets package
*/
@Test
public void testSwissFertilityByEntry() {
RealMatrix matrix = createRealMatrix(swissData, 47, 5);
double[] rData = new double[] {
156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
-64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
-79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
};
int row = matrix.getRowDimension();
int col = matrix.getColumnDimension();
double x = 0.0;
double y = 0.0;
StorelessCovariance covMatrix = new StorelessCovariance(5, 5);
for(int i=0;i<row;i++){
for(int j=0;j<col;j++){
x = matrix.getEntry(i, j);
for(int k=0;k<col;k++){
y = matrix.getEntry(i, k);
covMatrix.incrementCovariance(j, k, x, y);
}
}
}
RealMatrix covarianceMatrix = covMatrix.getCovarianceMatrix();
TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13);
}
protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
double[][] matrixData = new double[nRows][nCols];
int ptr = 0;
for (int i = 0; i < nRows; i++) {
System.arraycopy(data, ptr, matrixData[i], 0, nCols);
ptr += nCols;
}
return new Array2DRowRealMatrix(matrixData);
}
}