diff --git a/src/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java new file mode 100644 index 000000000..6e00c3603 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.linear.RealMatrixImpl; + +/** + * Abstract base class for implementations of MultipleLinearRegression. + */ +public abstract class AbstractMultipleLinearRegression implements + MultipleLinearRegression { + + protected RealMatrix X; + protected RealMatrix Y; + + /** + * Adds y sample data. + * + * @param y the [n,1] array representing the y sample + */ + protected void addYSampleData(double[] y){ + this.Y = new RealMatrixImpl(y); + } + + /** + * Adds x sample data. + * + * @param x the [n,k] array representing the x sample + */ + protected void addXSampleData(double[][] x){ + this.X = new RealMatrixImpl(x); + } + + public double[] estimateRegressionParameters(){ + RealMatrix b = calculateBeta(); + return b.getColumn(0); + } + + public double[] estimateResiduals(){ + RealMatrix b = calculateBeta(); + RealMatrix e = Y.subtract(X.multiply(b)); + return e.getColumn(0); + } + + public double[][] estimateRegressionParametersVariance() { + return calculateBetaVariance().getData(); + } + + public double estimateRegressandVariance() { + return calculateYVariance(); + } + + /** + * Calculates the beta of multiple linear regression in matrix notation. + */ + protected abstract RealMatrix calculateBeta(); + + /** + * Calculates the beta variance of multiple linear regression in matrix notation. + */ + protected abstract RealMatrix calculateBetaVariance(); + + /** + * Calculates the Y variance of multiple linear regression. + */ + protected abstract double calculateYVariance(); + + /** + * Calculates the residuals of multiple linear regression in matrix notation. + *
+ * u = y - X*b + *+ * + * @return The residuals [n,1] matrix + */ + protected RealMatrix calculateResiduals() { + RealMatrix b = calculateBeta(); + return Y.subtract(X.multiply(b)); + } + +} diff --git a/src/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java new file mode 100644 index 000000000..5d6072540 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.linear.RealMatrixImpl; + + +/** + * The GLS implementation of the multiple linear regression. + * + * GLS assumes a general covariance matrix Omega of the error + *
+ * u ~ N(0, Omega) + *+ * + * Estimated by GLS, + *
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y + *+ * whose variance is + *
+ * Var(b)=(X' Omega^-1 X)^-1 + *+ */ +public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression { + + private RealMatrix Omega; + + + public void addData(double[] y, double[][] x, double[][] covariance) { + addYSampleData(y); + addXSampleData(x); + addCovarianceData(covariance); + } + + /** + * Add the covariance data. + * + * @param omega the [n,n] array representing the covariance + */ + protected void addCovarianceData(double[][] omega){ + this.Omega = new RealMatrixImpl(omega); + } + + /** + * Calculates beta by GLS. + *
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y + *+ */ + protected RealMatrix calculateBeta() { + RealMatrix OI = Omega.inverse(); + RealMatrix XT = X.transpose(); + RealMatrix XTOIX = XT.multiply(OI).multiply(X); + return XTOIX.inverse().multiply(XT).multiply(OI).multiply(Y); + } + + /** + * Calculates the variance on the beta by GLS. + *
+ * Var(b)=(X' Omega^-1 X)^-1 + *+ * @return The beta variance matrix + */ + protected RealMatrix calculateBetaVariance() { + RealMatrix XTOIX = X.transpose().multiply(Omega.inverse()).multiply(X); + return XTOIX.inverse(); + } + + /** + * Calculates the variance on the y by GLS. + *
+ * Var(y)=Tr(u' Omega^-1 u)/(n-k) + *+ * @return The Y variance + */ + protected double calculateYVariance() { + RealMatrix u = calculateResiduals(); + RealMatrix sse = u.transpose().multiply(Omega.inverse()).multiply(u); + return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension()); + } + +} diff --git a/src/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java new file mode 100644 index 000000000..8da6be209 --- /dev/null +++ b/src/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +/** + * The multiple linear regression can be represented in matrix-notation. + *
+ * y=X*b+u + *+ * where y is an
n-vector
regressand, X is a [n,k]
matrix whose k
columns are called
+ * regressors, b is k-vector
of regression parameters and u
is an n-vector
+ * of error terms or residuals.
+ *
+ * The notation is quite standard in literature,
+ * cf eg Davidson and MacKinnon, Econometrics Theory and Methods, 2004.
+ */
+public interface MultipleLinearRegression {
+
+ /**
+ * Adds sample and covariance data.
+ *
+ * @param y the [n,1] array representing the y sample
+ * @param x the [n,k] array representing x sample
+ * @param covariance the [n,n] array representing the covariance matrix or null
if not appropriate for the
+ * specific implementation
+ */
+ void addData(double[] y, double[][] x, double[][] covariance);
+
+ /**
+ * Estimates the regression parameters b.
+ *
+ * @return The [k,1] array representing b
+ */
+ double[] estimateRegressionParameters();
+
+ /**
+ * Estimates the variance of the regression parameters, ie Var(b).
+ *
+ * @return The [k,k] array representing the variance of b
+ */
+ double[][] estimateRegressionParametersVariance();
+
+ /**
+ * Estimates the residuals, ie u = y - X*b.
+ *
+ * @return The [n,1] array representing the residuals
+ */
+ double[] estimateResiduals();
+
+ /**
+ * Returns the variance of the regressand, ie Var(y).
+ *
+ * @return The double representing the variance of y
+ */
+ double estimateRegressandVariance();
+
+}
\ No newline at end of file
diff --git a/src/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java b/src/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
new file mode 100644
index 000000000..9b533c30e
--- /dev/null
+++ b/src/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+import org.apache.commons.math.linear.RealMatrix;
+
+
+/**
+ * The OLS implementation of the multiple linear regression.
+ *
+ * OLS assumes the covariance matrix of the error to be diagonal and with equal variance.
+ * + * u ~ N(0, sigma^2*I) + *+ * + * Estimated by OLS, + *
+ * b=(X'X)^-1X'y + *+ * whose variance is + *
+ * Var(b)=MSE*(X'X)^-1, MSE=u'u/(n-k) + *+ */ +public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression { + + + public void addData(double[] y, double[][] x, double[][] covariance) { + addYSampleData(y); + addXSampleData(x); + } + + /** + * Calculates beta by OLS. + *
+ * b=(X'X)^-1X'y + *+ */ + protected RealMatrix calculateBeta() { + RealMatrix XTX = X.transpose().multiply(X); + return XTX.inverse().multiply(X.transpose()).multiply(Y); + } + + /** + * Calculates the variance on the beta by OLS. + *
+ * Var(b)=(X'X)^-1 + *+ * @return The beta variance + */ + protected RealMatrix calculateBetaVariance() { + RealMatrix XTX = X.transpose().multiply(X); + return XTX.inverse(); + } + + + /** + * Calculates the variance on the Y by OLS. + *
+ * Var(y)=Tr(u'u)/(n-k) + *+ * @return The Y variance + */ + protected double calculateYVariance() { + RealMatrix u = calculateResiduals(); + RealMatrix sse = u.transpose().multiply(u); + return sse.getTrace()/(X.getRowDimension()-X.getColumnDimension()); + } + +} diff --git a/src/site/xdoc/changes.xml b/src/site/xdoc/changes.xml index c16d2c23e..7b8d68376 100644 --- a/src/site/xdoc/changes.xml +++ b/src/site/xdoc/changes.xml @@ -39,6 +39,9 @@ The