diff --git a/src/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java new file mode 100644 index 000000000..6e00c3603 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.linear.RealMatrixImpl; + +/** + * Abstract base class for implementations of MultipleLinearRegression. + */ +public abstract class AbstractMultipleLinearRegression implements + MultipleLinearRegression { + + protected RealMatrix X; + protected RealMatrix Y; + + /** + * Adds y sample data. + * + * @param y the [n,1] array representing the y sample + */ + protected void addYSampleData(double[] y){ + this.Y = new RealMatrixImpl(y); + } + + /** + * Adds x sample data. + * + * @param x the [n,k] array representing the x sample + */ + protected void addXSampleData(double[][] x){ + this.X = new RealMatrixImpl(x); + } + + public double[] estimateRegressionParameters(){ + RealMatrix b = calculateBeta(); + return b.getColumn(0); + } + + public double[] estimateResiduals(){ + RealMatrix b = calculateBeta(); + RealMatrix e = Y.subtract(X.multiply(b)); + return e.getColumn(0); + } + + public double[][] estimateRegressionParametersVariance() { + return calculateBetaVariance().getData(); + } + + public double estimateRegressandVariance() { + return calculateYVariance(); + } + + /** + * Calculates the beta of multiple linear regression in matrix notation. + */ + protected abstract RealMatrix calculateBeta(); + + /** + * Calculates the beta variance of multiple linear regression in matrix notation. + */ + protected abstract RealMatrix calculateBetaVariance(); + + /** + * Calculates the Y variance of multiple linear regression. + */ + protected abstract double calculateYVariance(); + + /** + * Calculates the residuals of multiple linear regression in matrix notation. + *
+     * u = y - X*b
+     * 
+ * + * @return The residuals [n,1] matrix + */ + protected RealMatrix calculateResiduals() { + RealMatrix b = calculateBeta(); + return Y.subtract(X.multiply(b)); + } + +} diff --git a/src/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java new file mode 100644 index 000000000..5d6072540 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.linear.RealMatrixImpl; + + +/** + * The GLS implementation of the multiple linear regression. + * + * GLS assumes a general covariance matrix Omega of the error + *
+ * u ~ N(0, Omega)
+ * 
+ * + * Estimated by GLS, + *
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y
+ * 
+ * whose variance is + *
+ * Var(b)=(X' Omega^-1 X)^-1
+ * 
+ */ +public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression { + + private RealMatrix Omega; + + + public void addData(double[] y, double[][] x, double[][] covariance) { + addYSampleData(y); + addXSampleData(x); + addCovarianceData(covariance); + } + + /** + * Add the covariance data. + * + * @param omega the [n,n] array representing the covariance + */ + protected void addCovarianceData(double[][] omega){ + this.Omega = new RealMatrixImpl(omega); + } + + /** + * Calculates beta by GLS. + *
+     *  b=(X' Omega^-1 X)^-1X'Omega^-1 y
+     * 
+ */ + protected RealMatrix calculateBeta() { + RealMatrix OI = Omega.inverse(); + RealMatrix XT = X.transpose(); + RealMatrix XTOIX = XT.multiply(OI).multiply(X); + return XTOIX.inverse().multiply(XT).multiply(OI).multiply(Y); + } + + /** + * Calculates the variance on the beta by GLS. + *
+     *  Var(b)=(X' Omega^-1 X)^-1
+     * 
+ * @return The beta variance matrix + */ + protected RealMatrix calculateBetaVariance() { + RealMatrix XTOIX = X.transpose().multiply(Omega.inverse()).multiply(X); + return XTOIX.inverse(); + } + + /** + * Calculates the variance on the y by GLS. + *
+     *  Var(y)=Tr(u' Omega^-1 u)/(n-k)
+     * 
+ * @return The Y variance + */ + protected double calculateYVariance() { + RealMatrix u = calculateResiduals(); + RealMatrix sse = u.transpose().multiply(Omega.inverse()).multiply(u); + return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension()); + } + +} diff --git a/src/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java new file mode 100644 index 000000000..8da6be209 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +/** + * The multiple linear regression can be represented in matrix-notation. + *
+ *  y=X*b+u
+ * 
+ * where y is an n-vector regressand, X is a [n,k] matrix whose k columns are called + * regressors, b is k-vector of regression parameters and u is an n-vector + * of error terms or residuals. + * + * The notation is quite standard in literature, + * cf eg Davidson and MacKinnon, Econometrics Theory and Methods, 2004. + */ +public interface MultipleLinearRegression { + + /** + * Adds sample and covariance data. + * + * @param y the [n,1] array representing the y sample + * @param x the [n,k] array representing x sample + * @param covariance the [n,n] array representing the covariance matrix or null if not appropriate for the + * specific implementation + */ + void addData(double[] y, double[][] x, double[][] covariance); + + /** + * Estimates the regression parameters b. + * + * @return The [k,1] array representing b + */ + double[] estimateRegressionParameters(); + + /** + * Estimates the variance of the regression parameters, ie Var(b). + * + * @return The [k,k] array representing the variance of b + */ + double[][] estimateRegressionParametersVariance(); + + /** + * Estimates the residuals, ie u = y - X*b. + * + * @return The [n,1] array representing the residuals + */ + double[] estimateResiduals(); + + /** + * Returns the variance of the regressand, ie Var(y). + * + * @return The double representing the variance of y + */ + double estimateRegressandVariance(); + +} \ No newline at end of file diff --git a/src/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java new file mode 100644 index 000000000..9b533c30e --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.apache.commons.math.linear.RealMatrix; + + +/** + * The OLS implementation of the multiple linear regression. + * + * OLS assumes the covariance matrix of the error to be diagonal and with equal variance. + *
+ * u ~ N(0, sigma^2*I)
+ * 
+ * + * Estimated by OLS, + *
+ * b=(X'X)^-1X'y
+ * 
+ * whose variance is + *
+ * Var(b)=MSE*(X'X)^-1, MSE=u'u/(n-k)
+ * 
+ */ +public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression { + + + public void addData(double[] y, double[][] x, double[][] covariance) { + addYSampleData(y); + addXSampleData(x); + } + + /** + * Calculates beta by OLS. + *
+     * b=(X'X)^-1X'y
+     * 
+ */ + protected RealMatrix calculateBeta() { + RealMatrix XTX = X.transpose().multiply(X); + return XTX.inverse().multiply(X.transpose()).multiply(Y); + } + + /** + * Calculates the variance on the beta by OLS. + *
+     *  Var(b)=(X'X)^-1
+     * 
+ * @return The beta variance + */ + protected RealMatrix calculateBetaVariance() { + RealMatrix XTX = X.transpose().multiply(X); + return XTX.inverse(); + } + + + /** + * Calculates the variance on the Y by OLS. + *
+     *  Var(y)=Tr(u'u)/(n-k)
+     * 
+ * @return The Y variance + */ + protected double calculateYVariance() { + RealMatrix u = calculateResiduals(); + RealMatrix sse = u.transpose().multiply(u); + return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension()); + } + +} diff --git a/src/site/xdoc/changes.xml b/src/site/xdoc/changes.xml index c16d2c23e..7b8d68376 100644 --- a/src/site/xdoc/changes.xml +++ b/src/site/xdoc/changes.xml @@ -39,6 +39,9 @@ The type attribute can be add,update,fix,remove. + + Added Mauro's patch to support multiple regression. + Starting with version 2.0 of the library, the minimal version of the Java platform required to compile and use commons-math is Java 5. This version diff --git a/src/site/xdoc/tasks.xml b/src/site/xdoc/tasks.xml index 6a884fd5d..abab6acd3 100644 --- a/src/site/xdoc/tasks.xml +++ b/src/site/xdoc/tasks.xml @@ -69,7 +69,6 @@
  • More inference methods
  • -
  • Multiple regression
Linear Algebra
diff --git a/src/test/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegressionTest.java b/src/test/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegressionTest.java new file mode 100644 index 000000000..ff91a71e7 --- /dev/null +++ b/src/test/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegressionTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.Before; +import org.junit.Test; + + +public abstract class AbstractMultipleLinearRegressionTest { + + private MultipleLinearRegression regression; + + @Before + public void setUp(){ + regression = createRegression(); + } + + protected abstract MultipleLinearRegression createRegression(); + + protected abstract int getNumberOfRegressors(); + + protected abstract int getSampleSize(); + + @Test + public void canEstimateRegressionParameters(){ + double[] beta = regression.estimateRegressionParameters(); + assertEquals(getNumberOfRegressors(), beta.length); + } + + @Test + public void canEstimateResiduals(){ + double[] e = regression.estimateResiduals(); + assertEquals(getSampleSize(), e.length); + } + + @Test + public void canEstimateRegressionParametersVariance(){ + double[][] variance = regression.estimateRegressionParametersVariance(); + assertEquals(getNumberOfRegressors(), variance.length); + } + + @Test + public void canEstimateRegressandVariance(){ + double variance = regression.estimateRegressandVariance(); + assertTrue(variance > 0.0); + } + +} diff --git a/src/test/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java b/src/test/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java new file mode 100644 index 000000000..3b4ac0969 --- /dev/null +++ b/src/test/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.junit.Before; + +public class GLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest { + + private double[] y; + private double[][] x; + private double[][] omega; + + @Before + public void setUp(){ + y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0}; + x = new double[6][]; + x[0] = new double[]{1.0, 0, 0, 0, 0, 0}; + x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0}; + x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0}; + x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0}; + x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0}; + x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0}; + omega = new double[6][]; + omega[0] = new double[]{1.0, 0, 0, 0, 0, 0}; + omega[1] = new double[]{0, 2.0, 0, 0, 0, 0}; + omega[2] = new double[]{0, 0, 3.0, 0, 0, 0}; + omega[3] = new double[]{0, 0, 0, 4.0, 0, 0}; + omega[4] = new double[]{0, 0, 0, 0, 5.0, 0}; + omega[5] = new double[]{0, 0, 0, 0, 0, 6.0}; + super.setUp(); + } + + protected MultipleLinearRegression createRegression() { + MultipleLinearRegression regression = new GLSMultipleLinearRegression(); + regression.addData(y, x, omega); + return regression; + } + + protected int getNumberOfRegressors() { + return x[0].length; + } + + protected int getSampleSize() { + return y.length; + } + +} diff --git a/src/test/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java b/src/test/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java new file mode 100644 index 000000000..76f4d96d8 --- /dev/null +++ b/src/test/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.junit.Before; + +public class OLSMultipleLinearRegressionTest extends AbstractMultipleLinearRegressionTest { + + private double[] y; + private double[][] x; + + @Before + public void setUp(){ + y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0}; + x = new double[6][]; + x[0] = new double[]{1.0, 0, 0, 0, 0, 0}; + x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0}; + x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0}; + x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0}; + x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0}; + x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0}; + super.setUp(); + } + + protected MultipleLinearRegression createRegression() { + MultipleLinearRegression regression = new OLSMultipleLinearRegression(); + regression.addData(y, x, null); + return regression; + } + + protected int getNumberOfRegressors() { + return x[0].length; + } + + protected int getSampleSize() { + return y.length; + } + +}