From aad36b356e84d1fb65ae3383ea11368c9706cd4d Mon Sep 17 00:00:00 2001
From: Phil Steitz
Date: Mon, 20 Sep 2010 01:57:03 +0000
Subject: [PATCH] Fixed errors in multiple regression section. JIRA: MATH-407.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_X@998761 13f79535-47bb-0310-9956-ffa450edef68
---
src/site/xdoc/userguide/stat.xml | 93 ++++++++++++++++++--------------
1 file changed, 52 insertions(+), 41 deletions(-)
diff --git a/src/site/xdoc/userguide/stat.xml b/src/site/xdoc/userguide/stat.xml
index e63b56fe8..6ad146af4 100644
--- a/src/site/xdoc/userguide/stat.xml
+++ b/src/site/xdoc/userguide/stat.xml
@@ -473,37 +473,47 @@ System.out.println(regression.getSlopeStdErr());
-
- MultipleLinearRegression provides ordinary least squares regression
- with a generic multiple variable linear model, which in matrix notation
- can be expressed as:
+
+ OLSMultipleLinearRegression and
+
+ GLSMultipleLinearRegression provide least squares regression to fit the linear model:
- y=X*b+u
+ Y=X*b+u
- where y is an n-vector
regressand , X is a [n,k]
matrix whose k
columns are called
- regressors , b is k-vector
of regression parameters and u
is an n-vector
- of error terms or residuals . The notation is quite standard in literature,
- cf eg Davidson and MacKinnon, Econometrics Theory and Methods, 2004 .
+ where Y is an n-vector regressand , X is a [n,k] matrix whose k columns are called
+ regressors , b is k-vector of regression parameters and u is an n-vector
+ of error terms or residuals .
- Two implementations are provided:
- OLSMultipleLinearRegression and
+
+ OLSMultipleLinearRegression provides Ordinary Least Squares Regression, and
- GLSMultipleLinearRegression
+ GLSMultipleLinearRegression implements Generalized Least Squares. See the javadoc for these
+ classes for details on the algorithms and forumlas used.
- Observations (x,y and covariance data matrices) can be added to the model via the addData(double[] y, double[][] x, double[][] covariance)
method.
- The observations are stored in memory until the next time the addData method is invoked.
+ Data for OLS models can be loaded in a single double[] array, consisting of concatenated rows of data, each containing
+ the regressand (Y) value, followed by regressor values; or using a double[][] array with rows corresponding to
+ observations. GLS models also require a double[][] array representing the covariance matrix of the error terms. See
+
+ AbstractMultipleLinearRegression#newSampleData(double[],int,int) ,
+
+ OLSMultipleLinearRegression#newSampleData(double[], double[][]) and
+
+ GLSMultipleLinearRegression#newSampleData(double[],double[][],double[][]) for details.
Usage Notes :
- Data is validated when invoking the addData(double[] y, double[][] x, double[][] covariance)
method and
- IllegalArgumentException
is thrown when inappropriate.
+ Data are validated when invoking any of the newSample, newX, newY or newCovariance methods and
+ IllegalArgumentException
is thrown when input data arrays do not have matching dimensions
+ or do not contain sufficient data to estimate the model.
- Only the GLS regressions require the covariance matrix, so in the OLS regression it is ignored and can be safely
- inputted as null
.
+ By default, regression models are estimated with intercept terms. In the notation above, this implies that the
+ X matrix contains an initial row identically equal to 1. X data supplied to the newX or newSample methods should not
+ include this column - the data loading methods will create it automatically. To estimate a model without an intercept
+ term, set the noIntercept
property to true.
@@ -511,44 +521,48 @@ System.out.println(regression.getSlopeStdErr());
OLS regression
- Instantiate an OLS regression object and load dataset
+ Instantiate an OLS regression object and load a dataset:
-MultipleLinearRegression regression = new OLSMultipleLinearRegression();
+OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
double[] y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
double[] x = new double[6][];
-x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
-x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
-x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
-x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
-x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
-x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
-regression.addData(y, x, null); // we don't need covariance
+x[0] = new double[]{0, 0, 0, 0, 0};
+x[1] = new double[]{2.0, 0, 0, 0, 0};
+x[2] = new double[]{0, 3.0, 0, 0, 0};
+x[3] = new double[]{0, 0, 4.0, 0, 0};
+x[4] = new double[]{0, 0, 0, 5.0, 0};
+x[5] = new double[]{0, 0, 0, 0, 6.0};
+regression.newSample(y, x);
- Estimate of regression values honours the MultipleLinearRegression
interface:
+ Get regression parameters and diagnostics:
-double[] beta = regression.estimateRegressionParameters();
+double[] beta = regression.estimateRegressionParameters();
double[] residuals = regression.estimateResiduals();
double[][] parametersVariance = regression.estimateRegressionParametersVariance();
double regressandVariance = regression.estimateRegressandVariance();
+
+double rSquared = regression.caclulateRSquared();
+
+double sigma = regression.estimateRegressionStandardError();
GLS regression
- Instantiate an GLS regression object and load dataset
+ Instantiate a GLS regression object and load a dataset:
-MultipleLinearRegression regression = new GLSMultipleLinearRegression();
+GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
double[] y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
double[] x = new double[6][];
-x[0] = new double[]{1.0, 0, 0, 0, 0, 0};
-x[1] = new double[]{1.0, 2.0, 0, 0, 0, 0};
-x[2] = new double[]{1.0, 0, 3.0, 0, 0, 0};
-x[3] = new double[]{1.0, 0, 0, 4.0, 0, 0};
-x[4] = new double[]{1.0, 0, 0, 0, 5.0, 0};
-x[5] = new double[]{1.0, 0, 0, 0, 0, 6.0};
+x[0] = new double[]{0, 0, 0, 0, 0};
+x[1] = new double[]{2.0, 0, 0, 0, 0};
+x[2] = new double[]{0, 3.0, 0, 0, 0};
+x[3] = new double[]{0, 0, 4.0, 0, 0};
+x[4] = new double[]{0, 0, 0, 5.0, 0};
+x[5] = new double[]{0, 0, 0, 0, 6.0};
double[][] omega = new double[6][];
omega[0] = new double[]{1.1, 0, 0, 0, 0, 0};
omega[1] = new double[]{0, 2.2, 0, 0, 0, 0};
@@ -556,12 +570,9 @@ omega[2] = new double[]{0, 0, 3.3, 0, 0, 0};
omega[3] = new double[]{0, 0, 0, 4.4, 0, 0};
omega[4] = new double[]{0, 0, 0, 0, 5.5, 0};
omega[5] = new double[]{0, 0, 0, 0, 0, 6.6};
-regression.addData(y, x, omega); // we do need covariance
+regression.newSampleData(y, x, omega);
- Estimate of regression values honours the same MultipleLinearRegression
interface as
- the OLS regression.
-