added Mauro's patch to support multiple regression

there is still some work to do on this new feature
JIRA: MATH-203

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_0@657570 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Luc Maisonobe 2008-05-18 15:05:29 +00:00
parent 6ebc980cbc
commit 21872adbf8
9 changed files with 531 additions and 1 deletions

View File

@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealMatrixImpl;
/**
* Abstract base class for implementations of MultipleLinearRegression.
*/
public abstract class AbstractMultipleLinearRegression implements
MultipleLinearRegression {
protected RealMatrix X;
protected RealMatrix Y;
/**
* Adds y sample data.
*
* @param y the [n,1] array representing the y sample
*/
protected void addYSampleData(double[] y){
this.Y = new RealMatrixImpl(y);
}
/**
* Adds x sample data.
*
* @param x the [n,k] array representing the x sample
*/
protected void addXSampleData(double[][] x){
this.X = new RealMatrixImpl(x);
}
public double[] estimateRegressionParameters(){
RealMatrix b = calculateBeta();
return b.getColumn(0);
}
public double[] estimateResiduals(){
RealMatrix b = calculateBeta();
RealMatrix e = Y.subtract(X.multiply(b));
return e.getColumn(0);
}
public double[][] estimateRegressionParametersVariance() {
return calculateBetaVariance().getData();
}
public double estimateRegressandVariance() {
return calculateYVariance();
}
/**
* Calculates the beta of multiple linear regression in matrix notation.
*/
protected abstract RealMatrix calculateBeta();
/**
* Calculates the beta variance of multiple linear regression in matrix notation.
*/
protected abstract RealMatrix calculateBetaVariance();
/**
* Calculates the Y variance of multiple linear regression.
*/
protected abstract double calculateYVariance();
/**
* Calculates the residuals of multiple linear regression in matrix notation.
* <pre>
* u = y - X*b
* </pre>
*
* @return The residuals [n,1] matrix
*/
protected RealMatrix calculateResiduals() {
RealMatrix b = calculateBeta();
return Y.subtract(X.multiply(b));
}
}

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealMatrixImpl;
/**
* The GLS implementation of the multiple linear regression.
*
* GLS assumes a general covariance matrix Omega of the error
* <pre>
* u ~ N(0, Omega)
* </pre>
*
* Estimated by GLS,
* <pre>
* b=(X' Omega^-1 X)^-1X'Omega^-1 y
* </pre>
* whose variance is
* <pre>
* Var(b)=(X' Omega^-1 X)^-1
* </pre>
*/
public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
private RealMatrix Omega;
public void addData(double[] y, double[][] x, double[][] covariance) {
addYSampleData(y);
addXSampleData(x);
addCovarianceData(covariance);
}
/**
* Add the covariance data.
*
* @param omega the [n,n] array representing the covariance
*/
protected void addCovarianceData(double[][] omega){
this.Omega = new RealMatrixImpl(omega);
}
/**
* Calculates beta by GLS.
* <pre>
* b=(X' Omega^-1 X)^-1X'Omega^-1 y
* </pre>
*/
protected RealMatrix calculateBeta() {
RealMatrix OI = Omega.inverse();
RealMatrix XT = X.transpose();
RealMatrix XTOIX = XT.multiply(OI).multiply(X);
return XTOIX.inverse().multiply(XT).multiply(OI).multiply(Y);
}
/**
* Calculates the variance on the beta by GLS.
* <pre>
* Var(b)=(X' Omega^-1 X)^-1
* </pre>
* @return The beta variance matrix
*/
protected RealMatrix calculateBetaVariance() {
RealMatrix XTOIX = X.transpose().multiply(Omega.inverse()).multiply(X);
return XTOIX.inverse();
}
/**
* Calculates the variance on the y by GLS.
* <pre>
* Var(y)=Tr(u' Omega^-1 u)/(n-k)
* </pre>
* @return The Y variance
*/
protected double calculateYVariance() {
RealMatrix u = calculateResiduals();
RealMatrix sse = u.transpose().multiply(Omega.inverse()).multiply(u);
return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension());
}
}

View File

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
/**
* The multiple linear regression can be represented in matrix-notation.
* <pre>
* y=X*b+u
* </pre>
* where y is an <code>n-vector</code> <b>regressand</b>, X is a <code>[n,k]</code> matrix whose <code>k</code> columns are called
* <b>regressors</b>, b is <code>k-vector</code> of <b>regression parameters</b> and <code>u</code> is an <code>n-vector</code>
* of <b>error terms</b> or <b>residuals</b>.
*
* The notation is quite standard in literature,
* cf eg <a href="http://www.econ.queensu.ca/ETM">Davidson and MacKinnon, Econometrics Theory and Methods, 2004</a>.
*/
public interface MultipleLinearRegression {
/**
* Adds sample and covariance data.
*
* @param y the [n,1] array representing the y sample
* @param x the [n,k] array representing x sample
* @param covariance the [n,n] array representing the covariance matrix or <code>null</code> if not appropriate for the
* specific implementation
*/
void addData(double[] y, double[][] x, double[][] covariance);
/**
* Estimates the regression parameters b.
*
* @return The [k,1] array representing b
*/
double[] estimateRegressionParameters();
/**
* Estimates the variance of the regression parameters, ie Var(b).
*
* @return The [k,k] array representing the variance of b
*/
double[][] estimateRegressionParametersVariance();
/**
* Estimates the residuals, ie u = y - X*b.
*
* @return The [n,1] array representing the residuals
*/
double[] estimateResiduals();
/**
* Returns the variance of the regressand, ie Var(y).
*
* @return The double representing the variance of y
*/
double estimateRegressandVariance();
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.linear.RealMatrix;
/**
* The OLS implementation of the multiple linear regression.
*
* OLS assumes the covariance matrix of the error to be diagonal and with equal variance.
* <pre>
* u ~ N(0, sigma^2*I)
* </pre>
*
* Estimated by OLS,
* <pre>
* b=(X'X)^-1X'y
* </pre>
* whose variance is
* <pre>
* Var(b)=MSE*(X'X)^-1, MSE=u'u/(n-k)
* </pre>
*/
public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
public void addData(double[] y, double[][] x, double[][] covariance) {
addYSampleData(y);
addXSampleData(x);
}
/**
* Calculates beta by OLS.
* <pre>
* b=(X'X)^-1X'y
* </pre>
*/
protected RealMatrix calculateBeta() {
RealMatrix XTX = X.transpose().multiply(X);
return XTX.inverse().multiply(X.transpose()).multiply(Y);
}
/**
* Calculates the variance on the beta by OLS.
* <pre>
* Var(b)=(X'X)^-1
* </pre>
* @return The beta variance
*/
protected RealMatrix calculateBetaVariance() {
RealMatrix XTX = X.transpose().multiply(X);
return XTX.inverse();
}
/**
* Calculates the variance on the Y by OLS.
* <pre>
* Var(y)=Tr(u'u)/(n-k)
* </pre>
* @return The Y variance
*/
protected double calculateYVariance() {
RealMatrix u = calculateResiduals();
RealMatrix sse = u.transpose().multiply(u);
return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension());
}
}

View File

@ -39,6 +39,9 @@ The <action> type attribute can be add,update,fix,remove.
</properties>
<body>
<release version="2.0" date="TBD" description="TBD">
<action dev="luc" type="add" issue="MATH-203" due-to="Mauro Talevi">
Added Mauro's patch to support multiple regression.
</action>
<action dev="luc" type="update" >
Starting with version 2.0 of the library, the minimal version of the Java
platform required to compile and use commons-math is Java 5. This version

View File

@ -69,7 +69,6 @@
<dd>
<ul>
<li>More inference methods</li>
<li>Multiple regression</li>
</ul>
</dd>
<dt>Linear Algebra</dt>

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
public abstract class AbstractMultipleLinearRegressionTest {
private MultipleLinearRegression regression;
@Before
public void setUp(){
regression = createRegression();
}
protected abstract MultipleLinearRegression createRegression();
protected abstract int getNumberOfRegressors();
protected abstract int getSampleSize();
@Test
public void canEstimateRegressionParameters(){
double[] beta = regression.estimateRegressionParameters();
assertEquals(getNumberOfRegressors(), beta.length);
}
@Test
public void canEstimateResiduals(){
double[] e = regression.estimateResiduals();
assertEquals(getSampleSize(), e.length);
}
@Test
public void canEstimateRegressionParametersVariance(){
double[][] variance = regression.estimateRegressionParametersVariance();
assertEquals(getNumberOfRegressors(), variance.length);
}
@Test
public void canEstimateRegressandVariance(){
double variance = regression.estimateRegressandVariance();
assertTrue(variance > 0.0);
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.junit.Before;
public class GLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest {
private double[] y;
private double[][] x;
private double[][] omega;
@Before
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
omega = new double[6][];
omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
omega[2] = new double[]{0, 0, 3.0, 0, 0, 0};
omega[3] = new double[]{0, 0, 0, 4.0, 0, 0};
omega[4] = new double[]{0, 0, 0, 0, 5.0, 0};
omega[5] = new double[]{0, 0, 0, 0, 0, 6.0};
super.setUp();
}
protected MultipleLinearRegression createRegression() {
MultipleLinearRegression regression = new GLSMultipleLinearRegression();
regression.addData(y, x, omega);
return regression;
}
protected int getNumberOfRegressors() {
return x[0].length;
}
protected int getSampleSize() {
return y.length;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.junit.Before;
public class OLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest {
private double[] y;
private double[][] x;
@Before
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
super.setUp();
}
protected MultipleLinearRegression createRegression() {
MultipleLinearRegression regression = new OLSMultipleLinearRegression();
regression.addData(y, x, null);
return regression;
}
protected int getNumberOfRegressors() {
return x[0].length;
}
protected int getSampleSize() {
return y.length;
}
}