added Mauro's patch to support multiple regression

there is still some work to do on this new feature
JIRA: MATH-203

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_0@657570 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Luc Maisonobe 2008-05-18 15:05:29 +00:00
parent 6ebc980cbc
commit 21872adbf8
9 changed files with 531 additions and 1 deletions

View File

@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealMatrixImpl;
/**
* Abstract base class for implementations of MultipleLinearRegression.
*/
public abstract class AbstractMultipleLinearRegression implements
MultipleLinearRegression {
protected RealMatrix X;
protected RealMatrix Y;
/**
* Adds y sample data.
*
* @param y the [n,1] array representing the y sample
*/
protected void addYSampleData(double[] y){
this.Y = new RealMatrixImpl(y);
}
/**
* Adds x sample data.
*
* @param x the [n,k] array representing the x sample
*/
protected void addXSampleData(double[][] x){
this.X = new RealMatrixImpl(x);
}
public double[] estimateRegressionParameters(){
RealMatrix b = calculateBeta();
return b.getColumn(0);
}
public double[] estimateResiduals(){
RealMatrix b = calculateBeta();
RealMatrix e = Y.subtract(X.multiply(b));
return e.getColumn(0);
}
public double[][] estimateRegressionParametersVariance() {
return calculateBetaVariance().getData();
}
public double estimateRegressandVariance() {
return calculateYVariance();
}
/**
* Calculates the beta of multiple linear regression in matrix notation.
*/
protected abstract RealMatrix calculateBeta();
/**
* Calculates the beta variance of multiple linear regression in matrix notation.
*/
protected abstract RealMatrix calculateBetaVariance();
/**
* Calculates the Y variance of multiple linear regression.
*/
protected abstract double calculateYVariance();
/**
* Calculates the residuals of multiple linear regression in matrix notation.
* <pre>
* u = y - X*b
* </pre>
*
* @return The residuals [n,1] matrix
*/
protected RealMatrix calculateResiduals() {
RealMatrix b = calculateBeta();
return Y.subtract(X.multiply(b));
}
}

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealMatrixImpl;
/**
* The GLS implementation of the multiple linear regression.
*
* GLS assumes a general covariance matrix Omega of the error
* <pre>
* u ~ N(0, Omega)
* </pre>
*
* Estimated by GLS,
* <pre>
* b=(X' Omega^-1 X)^-1X'Omega^-1 y
* </pre>
* whose variance is
* <pre>
* Var(b)=(X' Omega^-1 X)^-1
* </pre>
*/
public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
private RealMatrix Omega;
public void addData(double[] y, double[][] x, double[][] covariance) {
addYSampleData(y);
addXSampleData(x);
addCovarianceData(covariance);
}
/**
* Add the covariance data.
*
* @param omega the [n,n] array representing the covariance
*/
protected void addCovarianceData(double[][] omega){
this.Omega = new RealMatrixImpl(omega);
}
/**
* Calculates beta by GLS.
* <pre>
* b=(X' Omega^-1 X)^-1X'Omega^-1 y
* </pre>
*/
protected RealMatrix calculateBeta() {
RealMatrix OI = Omega.inverse();
RealMatrix XT = X.transpose();
RealMatrix XTOIX = XT.multiply(OI).multiply(X);
return XTOIX.inverse().multiply(XT).multiply(OI).multiply(Y);
}
/**
* Calculates the variance on the beta by GLS.
* <pre>
* Var(b)=(X' Omega^-1 X)^-1
* </pre>
* @return The beta variance matrix
*/
protected RealMatrix calculateBetaVariance() {
RealMatrix XTOIX = X.transpose().multiply(Omega.inverse()).multiply(X);
return XTOIX.inverse();
}
/**
* Calculates the variance on the y by GLS.
* <pre>
* Var(y)=Tr(u' Omega^-1 u)/(n-k)
* </pre>
* @return The Y variance
*/
protected double calculateYVariance() {
RealMatrix u = calculateResiduals();
RealMatrix sse = u.transpose().multiply(Omega.inverse()).multiply(u);
return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension());
}
}

View File

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
/**
* The multiple linear regression can be represented in matrix-notation.
* <pre>
* y=X*b+u
* </pre>
* where y is an <code>n-vector</code> <b>regressand</b>, X is a <code>[n,k]</code> matrix whose <code>k</code> columns are called
* <b>regressors</b>, b is <code>k-vector</code> of <b>regression parameters</b> and <code>u</code> is an <code>n-vector</code>
* of <b>error terms</b> or <b>residuals</b>.
*
* The notation is quite standard in literature,
* cf eg <a href="http://www.econ.queensu.ca/ETM">Davidson and MacKinnon, Econometrics Theory and Methods, 2004</a>.
*/
public interface MultipleLinearRegression {
/**
* Adds sample and covariance data.
*
* @param y the [n,1] array representing the y sample
* @param x the [n,k] array representing x sample
* @param covariance the [n,n] array representing the covariance matrix or <code>null</code> if not appropriate for the
* specific implementation
*/
void addData(double[] y, double[][] x, double[][] covariance);
/**
* Estimates the regression parameters b.
*
* @return The [k,1] array representing b
*/
double[] estimateRegressionParameters();
/**
* Estimates the variance of the regression parameters, ie Var(b).
*
* @return The [k,k] array representing the variance of b
*/
double[][] estimateRegressionParametersVariance();
/**
* Estimates the residuals, ie u = y - X*b.
*
* @return The [n,1] array representing the residuals
*/
double[] estimateResiduals();
/**
* Returns the variance of the regressand, ie Var(y).
*
* @return The double representing the variance of y
*/
double estimateRegressandVariance();
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.linear.RealMatrix;
/**
* The OLS implementation of the multiple linear regression.
*
* OLS assumes the covariance matrix of the error to be diagonal and with equal variance.
* <pre>
* u ~ N(0, sigma^2*I)
* </pre>
*
* Estimated by OLS,
* <pre>
* b=(X'X)^-1X'y
* </pre>
* whose variance is
* <pre>
* Var(b)=MSE*(X'X)^-1, MSE=u'u/(n-k)
* </pre>
*/
public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
public void addData(double[] y, double[][] x, double[][] covariance) {
addYSampleData(y);
addXSampleData(x);
}
/**
* Calculates beta by OLS.
* <pre>
* b=(X'X)^-1X'y
* </pre>
*/
protected RealMatrix calculateBeta() {
RealMatrix XTX = X.transpose().multiply(X);
return XTX.inverse().multiply(X.transpose()).multiply(Y);
}
/**
* Calculates the variance on the beta by OLS.
* <pre>
* Var(b)=(X'X)^-1
* </pre>
* @return The beta variance
*/
protected RealMatrix calculateBetaVariance() {
RealMatrix XTX = X.transpose().multiply(X);
return XTX.inverse();
}
/**
* Calculates the variance on the Y by OLS.
* <pre>
* Var(y)=Tr(u'u)/(n-k)
* </pre>
* @return The Y variance
*/
protected double calculateYVariance() {
RealMatrix u = calculateResiduals();
RealMatrix sse = u.transpose().multiply(u);
return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension());
}
}

View File

@ -39,6 +39,9 @@ The <action> type attribute can be add,update,fix,remove.
</properties> </properties>
<body> <body>
<release version="2.0" date="TBD" description="TBD"> <release version="2.0" date="TBD" description="TBD">
<action dev="luc" type="add" issue="MATH-203" due-to="Mauro Talevi">
Added Mauro's patch to support multiple regression.
</action>
<action dev="luc" type="update" > <action dev="luc" type="update" >
Starting with version 2.0 of the library, the minimal version of the Java Starting with version 2.0 of the library, the minimal version of the Java
platform required to compile and use commons-math is Java 5. This version platform required to compile and use commons-math is Java 5. This version

View File

@ -69,7 +69,6 @@
<dd> <dd>
<ul> <ul>
<li>More inference methods</li> <li>More inference methods</li>
<li>Multiple regression</li>
</ul> </ul>
</dd> </dd>
<dt>Linear Algebra</dt> <dt>Linear Algebra</dt>

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
public abstract class AbstractMultipleLinearRegressionTest {
private MultipleLinearRegression regression;
@Before
public void setUp(){
regression = createRegression();
}
protected abstract MultipleLinearRegression createRegression();
protected abstract int getNumberOfRegressors();
protected abstract int getSampleSize();
@Test
public void canEstimateRegressionParameters(){
double[] beta = regression.estimateRegressionParameters();
assertEquals(getNumberOfRegressors(), beta.length);
}
@Test
public void canEstimateResiduals(){
double[] e = regression.estimateResiduals();
assertEquals(getSampleSize(), e.length);
}
@Test
public void canEstimateRegressionParametersVariance(){
double[][] variance = regression.estimateRegressionParametersVariance();
assertEquals(getNumberOfRegressors(), variance.length);
}
@Test
public void canEstimateRegressandVariance(){
double variance = regression.estimateRegressandVariance();
assertTrue(variance > 0.0);
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.junit.Before;
public class GLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest {
private double[] y;
private double[][] x;
private double[][] omega;
@Before
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
omega = new double[6][];
omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
omega[2] = new double[]{0, 0, 3.0, 0, 0, 0};
omega[3] = new double[]{0, 0, 0, 4.0, 0, 0};
omega[4] = new double[]{0, 0, 0, 0, 5.0, 0};
omega[5] = new double[]{0, 0, 0, 0, 0, 6.0};
super.setUp();
}
protected MultipleLinearRegression createRegression() {
MultipleLinearRegression regression = new GLSMultipleLinearRegression();
regression.addData(y, x, omega);
return regression;
}
protected int getNumberOfRegressors() {
return x[0].length;
}
protected int getSampleSize() {
return y.length;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.junit.Before;
public class OLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest {
private double[] y;
private double[][] x;
@Before
public void setUp(){
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
x = new double[6][];
x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
super.setUp();
}
protected MultipleLinearRegression createRegression() {
MultipleLinearRegression regression = new OLSMultipleLinearRegression();
regression.addData(y, x, null);
return regression;
}
protected int getNumberOfRegressors() {
return x[0].length;
}
protected int getSampleSize() {
return y.length;
}
}