added Mauro's patch to support multiple regression
there is still some work to do on this new feature JIRA: MATH-203 git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_0@657570 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6ebc980cbc
commit
21872adbf8
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
import org.apache.commons.math.linear.RealMatrix;
|
||||
import org.apache.commons.math.linear.RealMatrixImpl;
|
||||
|
||||
/**
|
||||
* Abstract base class for implementations of MultipleLinearRegression.
|
||||
*/
|
||||
public abstract class AbstractMultipleLinearRegression implements
|
||||
MultipleLinearRegression {
|
||||
|
||||
protected RealMatrix X;
|
||||
protected RealMatrix Y;
|
||||
|
||||
/**
|
||||
* Adds y sample data.
|
||||
*
|
||||
* @param y the [n,1] array representing the y sample
|
||||
*/
|
||||
protected void addYSampleData(double[] y){
|
||||
this.Y = new RealMatrixImpl(y);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds x sample data.
|
||||
*
|
||||
* @param x the [n,k] array representing the x sample
|
||||
*/
|
||||
protected void addXSampleData(double[][] x){
|
||||
this.X = new RealMatrixImpl(x);
|
||||
}
|
||||
|
||||
public double[] estimateRegressionParameters(){
|
||||
RealMatrix b = calculateBeta();
|
||||
return b.getColumn(0);
|
||||
}
|
||||
|
||||
public double[] estimateResiduals(){
|
||||
RealMatrix b = calculateBeta();
|
||||
RealMatrix e = Y.subtract(X.multiply(b));
|
||||
return e.getColumn(0);
|
||||
}
|
||||
|
||||
public double[][] estimateRegressionParametersVariance() {
|
||||
return calculateBetaVariance().getData();
|
||||
}
|
||||
|
||||
public double estimateRegressandVariance() {
|
||||
return calculateYVariance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the beta of multiple linear regression in matrix notation.
|
||||
*/
|
||||
protected abstract RealMatrix calculateBeta();
|
||||
|
||||
/**
|
||||
* Calculates the beta variance of multiple linear regression in matrix notation.
|
||||
*/
|
||||
protected abstract RealMatrix calculateBetaVariance();
|
||||
|
||||
/**
|
||||
* Calculates the Y variance of multiple linear regression.
|
||||
*/
|
||||
protected abstract double calculateYVariance();
|
||||
|
||||
/**
|
||||
* Calculates the residuals of multiple linear regression in matrix notation.
|
||||
* <pre>
|
||||
* u = y - X*b
|
||||
* </pre>
|
||||
*
|
||||
* @return The residuals [n,1] matrix
|
||||
*/
|
||||
protected RealMatrix calculateResiduals() {
|
||||
RealMatrix b = calculateBeta();
|
||||
return Y.subtract(X.multiply(b));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
import org.apache.commons.math.linear.RealMatrix;
|
||||
import org.apache.commons.math.linear.RealMatrixImpl;
|
||||
|
||||
|
||||
/**
|
||||
* The GLS implementation of the multiple linear regression.
|
||||
*
|
||||
* GLS assumes a general covariance matrix Omega of the error
|
||||
* <pre>
|
||||
* u ~ N(0, Omega)
|
||||
* </pre>
|
||||
*
|
||||
* Estimated by GLS,
|
||||
* <pre>
|
||||
* b=(X' Omega^-1 X)^-1X'Omega^-1 y
|
||||
* </pre>
|
||||
* whose variance is
|
||||
* <pre>
|
||||
* Var(b)=(X' Omega^-1 X)^-1
|
||||
* </pre>
|
||||
*/
|
||||
public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
|
||||
|
||||
private RealMatrix Omega;
|
||||
|
||||
|
||||
public void addData(double[] y, double[][] x, double[][] covariance) {
|
||||
addYSampleData(y);
|
||||
addXSampleData(x);
|
||||
addCovarianceData(covariance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the covariance data.
|
||||
*
|
||||
* @param omega the [n,n] array representing the covariance
|
||||
*/
|
||||
protected void addCovarianceData(double[][] omega){
|
||||
this.Omega = new RealMatrixImpl(omega);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates beta by GLS.
|
||||
* <pre>
|
||||
* b=(X' Omega^-1 X)^-1X'Omega^-1 y
|
||||
* </pre>
|
||||
*/
|
||||
protected RealMatrix calculateBeta() {
|
||||
RealMatrix OI = Omega.inverse();
|
||||
RealMatrix XT = X.transpose();
|
||||
RealMatrix XTOIX = XT.multiply(OI).multiply(X);
|
||||
return XTOIX.inverse().multiply(XT).multiply(OI).multiply(Y);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the variance on the beta by GLS.
|
||||
* <pre>
|
||||
* Var(b)=(X' Omega^-1 X)^-1
|
||||
* </pre>
|
||||
* @return The beta variance matrix
|
||||
*/
|
||||
protected RealMatrix calculateBetaVariance() {
|
||||
RealMatrix XTOIX = X.transpose().multiply(Omega.inverse()).multiply(X);
|
||||
return XTOIX.inverse();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the variance on the y by GLS.
|
||||
* <pre>
|
||||
* Var(y)=Tr(u' Omega^-1 u)/(n-k)
|
||||
* </pre>
|
||||
* @return The Y variance
|
||||
*/
|
||||
protected double calculateYVariance() {
|
||||
RealMatrix u = calculateResiduals();
|
||||
RealMatrix sse = u.transpose().multiply(Omega.inverse()).multiply(u);
|
||||
return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
/**
|
||||
* The multiple linear regression can be represented in matrix-notation.
|
||||
* <pre>
|
||||
* y=X*b+u
|
||||
* </pre>
|
||||
* where y is an <code>n-vector</code> <b>regressand</b>, X is a <code>[n,k]</code> matrix whose <code>k</code> columns are called
|
||||
* <b>regressors</b>, b is <code>k-vector</code> of <b>regression parameters</b> and <code>u</code> is an <code>n-vector</code>
|
||||
* of <b>error terms</b> or <b>residuals</b>.
|
||||
*
|
||||
* The notation is quite standard in literature,
|
||||
* cf eg <a href="http://www.econ.queensu.ca/ETM">Davidson and MacKinnon, Econometrics Theory and Methods, 2004</a>.
|
||||
*/
|
||||
public interface MultipleLinearRegression {
|
||||
|
||||
/**
|
||||
* Adds sample and covariance data.
|
||||
*
|
||||
* @param y the [n,1] array representing the y sample
|
||||
* @param x the [n,k] array representing x sample
|
||||
* @param covariance the [n,n] array representing the covariance matrix or <code>null</code> if not appropriate for the
|
||||
* specific implementation
|
||||
*/
|
||||
void addData(double[] y, double[][] x, double[][] covariance);
|
||||
|
||||
/**
|
||||
* Estimates the regression parameters b.
|
||||
*
|
||||
* @return The [k,1] array representing b
|
||||
*/
|
||||
double[] estimateRegressionParameters();
|
||||
|
||||
/**
|
||||
* Estimates the variance of the regression parameters, ie Var(b).
|
||||
*
|
||||
* @return The [k,k] array representing the variance of b
|
||||
*/
|
||||
double[][] estimateRegressionParametersVariance();
|
||||
|
||||
/**
|
||||
* Estimates the residuals, ie u = y - X*b.
|
||||
*
|
||||
* @return The [n,1] array representing the residuals
|
||||
*/
|
||||
double[] estimateResiduals();
|
||||
|
||||
/**
|
||||
* Returns the variance of the regressand, ie Var(y).
|
||||
*
|
||||
* @return The double representing the variance of y
|
||||
*/
|
||||
double estimateRegressandVariance();
|
||||
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
import org.apache.commons.math.linear.RealMatrix;
|
||||
|
||||
|
||||
/**
|
||||
* The OLS implementation of the multiple linear regression.
|
||||
*
|
||||
* OLS assumes the covariance matrix of the error to be diagonal and with equal variance.
|
||||
* <pre>
|
||||
* u ~ N(0, sigma^2*I)
|
||||
* </pre>
|
||||
*
|
||||
* Estimated by OLS,
|
||||
* <pre>
|
||||
* b=(X'X)^-1X'y
|
||||
* </pre>
|
||||
* whose variance is
|
||||
* <pre>
|
||||
* Var(b)=MSE*(X'X)^-1, MSE=u'u/(n-k)
|
||||
* </pre>
|
||||
*/
|
||||
public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
|
||||
|
||||
|
||||
public void addData(double[] y, double[][] x, double[][] covariance) {
|
||||
addYSampleData(y);
|
||||
addXSampleData(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates beta by OLS.
|
||||
* <pre>
|
||||
* b=(X'X)^-1X'y
|
||||
* </pre>
|
||||
*/
|
||||
protected RealMatrix calculateBeta() {
|
||||
RealMatrix XTX = X.transpose().multiply(X);
|
||||
return XTX.inverse().multiply(X.transpose()).multiply(Y);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the variance on the beta by OLS.
|
||||
* <pre>
|
||||
* Var(b)=(X'X)^-1
|
||||
* </pre>
|
||||
* @return The beta variance
|
||||
*/
|
||||
protected RealMatrix calculateBetaVariance() {
|
||||
RealMatrix XTX = X.transpose().multiply(X);
|
||||
return XTX.inverse();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the variance on the Y by OLS.
|
||||
* <pre>
|
||||
* Var(y)=Tr(u'u)/(n-k)
|
||||
* </pre>
|
||||
* @return The Y variance
|
||||
*/
|
||||
protected double calculateYVariance() {
|
||||
RealMatrix u = calculateResiduals();
|
||||
RealMatrix sse = u.transpose().multiply(u);
|
||||
return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension());
|
||||
}
|
||||
|
||||
}
|
|
@ -39,6 +39,9 @@ The <action> type attribute can be add,update,fix,remove.
|
|||
</properties>
|
||||
<body>
|
||||
<release version="2.0" date="TBD" description="TBD">
|
||||
<action dev="luc" type="add" issue="MATH-203" due-to="Mauro Talevi">
|
||||
Added Mauro's patch to support multiple regression.
|
||||
</action>
|
||||
<action dev="luc" type="update" >
|
||||
Starting with version 2.0 of the library, the minimal version of the Java
|
||||
platform required to compile and use commons-math is Java 5. This version
|
||||
|
|
|
@ -69,7 +69,6 @@
|
|||
<dd>
|
||||
<ul>
|
||||
<li>More inference methods</li>
|
||||
<li>Multiple regression</li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt>Linear Algebra</dt>
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public abstract class AbstractMultipleLinearRegressionTest {
|
||||
|
||||
private MultipleLinearRegression regression;
|
||||
|
||||
@Before
|
||||
public void setUp(){
|
||||
regression = createRegression();
|
||||
}
|
||||
|
||||
protected abstract MultipleLinearRegression createRegression();
|
||||
|
||||
protected abstract int getNumberOfRegressors();
|
||||
|
||||
protected abstract int getSampleSize();
|
||||
|
||||
@Test
|
||||
public void canEstimateRegressionParameters(){
|
||||
double[] beta = regression.estimateRegressionParameters();
|
||||
assertEquals(getNumberOfRegressors(), beta.length);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canEstimateResiduals(){
|
||||
double[] e = regression.estimateResiduals();
|
||||
assertEquals(getSampleSize(), e.length);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canEstimateRegressionParametersVariance(){
|
||||
double[][] variance = regression.estimateRegressionParametersVariance();
|
||||
assertEquals(getNumberOfRegressors(), variance.length);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canEstimateRegressandVariance(){
|
||||
double variance = regression.estimateRegressandVariance();
|
||||
assertTrue(variance > 0.0);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
import org.junit.Before;
|
||||
|
||||
public class GLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest {
|
||||
|
||||
private double[] y;
|
||||
private double[][] x;
|
||||
private double[][] omega;
|
||||
|
||||
@Before
|
||||
public void setUp(){
|
||||
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
|
||||
x = new double[6][];
|
||||
x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
|
||||
x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
|
||||
x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
|
||||
x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
|
||||
x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
|
||||
x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
|
||||
omega = new double[6][];
|
||||
omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
|
||||
omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
|
||||
omega[2] = new double[]{0, 0, 3.0, 0, 0, 0};
|
||||
omega[3] = new double[]{0, 0, 0, 4.0, 0, 0};
|
||||
omega[4] = new double[]{0, 0, 0, 0, 5.0, 0};
|
||||
omega[5] = new double[]{0, 0, 0, 0, 0, 6.0};
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
protected MultipleLinearRegression createRegression() {
|
||||
MultipleLinearRegression regression = new GLSMultipleLinearRegression();
|
||||
regression.addData(y, x, omega);
|
||||
return regression;
|
||||
}
|
||||
|
||||
protected int getNumberOfRegressors() {
|
||||
return x[0].length;
|
||||
}
|
||||
|
||||
protected int getSampleSize() {
|
||||
return y.length;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math.stat.regression;
|
||||
|
||||
import org.junit.Before;
|
||||
|
||||
public class OLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest {
|
||||
|
||||
private double[] y;
|
||||
private double[][] x;
|
||||
|
||||
@Before
|
||||
public void setUp(){
|
||||
y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
|
||||
x = new double[6][];
|
||||
x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
|
||||
x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
|
||||
x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
|
||||
x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
|
||||
x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
|
||||
x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
protected MultipleLinearRegression createRegression() {
|
||||
MultipleLinearRegression regression = new OLSMultipleLinearRegression();
|
||||
regression.addData(y, x, null);
|
||||
return regression;
|
||||
}
|
||||
|
||||
protected int getNumberOfRegressors() {
|
||||
return x[0].length;
|
||||
}
|
||||
|
||||
protected int getSampleSize() {
|
||||
return y.length;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue