Added ModelSpecificationException and changed UpdatingRegression impls to use it. JIRA: MATH-607.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1150923 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2011-07-25 21:34:04 +00:00
parent d4283bbd1d
commit 54b0cce8fe
5 changed files with 107 additions and 38 deletions

View File

@ -137,6 +137,7 @@ public enum LocalizedFormats implements Localizable {
INVALID_ITERATIONS_LIMITS("invalid iteration limits: min={0}, max={1}"),
INVALID_MAX_ITERATIONS("bad value for maximum iterations number: {0}"),
INVALID_REGRESSION_ARRAY("input data array length = {0} does not match the number of observations = {1} and the number of regressors = {2}"),
INVALID_REGRESSION_OBSERVATION("length of regressor array = {0} does not match the number of variables = {1} in the model"),
INVALID_ROUNDING_METHOD("invalid rounding method {0}, valid methods: {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})"),
ITERATOR_EXHAUSTED("iterator exhausted"),
LCM_OVERFLOW_32_BITS("overflow: lcm({0}, {1}) is 2^31"),
@ -234,6 +235,7 @@ public enum LocalizedFormats implements Localizable {
NO_DENSITY_FOR_THIS_DISTRIBUTION("This distribution does not have a density function implemented"),
NO_FEASIBLE_SOLUTION("no feasible solution"),
NO_OPTIMUM_COMPUTED_YET("no optimum computed yet"), /* keep */
NO_REGRESSORS("Regression model must include at least one regressor"),
NO_RESULT_AVAILABLE("no result available"),
NO_SUCH_MATRIX_ENTRY("no entry at indices ({0}, {1}) in a {2}x{3} matrix"),
NULL_NOT_ALLOWED("null is not allowed"), /* keep */
@ -292,6 +294,7 @@ public enum LocalizedFormats implements Localizable {
SUBARRAY_ENDS_AFTER_ARRAY_END("subarray ends after array end"),
TOO_LARGE_CUTOFF_SINGULAR_VALUE("cutoff singular value is {0}, should be at most {1}"),
TOO_MANY_ELEMENTS_TO_DISCARD_FROM_ARRAY("cannot discard {0} elements from a {1} elements array"),
TOO_MANY_REGRESSORS("too many regressors ({0}) specified, only {1} in the model"),
TOO_SMALL_COST_RELATIVE_TOLERANCE("cost relative tolerance is too small ({0}), no further reduction in the sum of squares is possible"),
TOO_SMALL_INTEGRATION_INTERVAL("too small integration interval: length = {0}"),
TOO_SMALL_ORTHOGONALITY_TOLERANCE("orthogonality tolerance is too small ({0}), solution is orthogonal to the jacobian"),

View File

@ -18,8 +18,10 @@ package org.apache.commons.math.stat.regression;
import java.util.Arrays;
import org.apache.commons.math.MathException;
import org.apache.commons.math.MathRuntimeException;
import org.apache.commons.math.exception.util.DummyLocalizable;
import org.apache.commons.math.exception.util.Localizable;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.util.FastMath;
import org.apache.commons.math.util.MathUtils;
@ -81,11 +83,6 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
private boolean hasIntercept;
/** zero tolerance */
private final double epsilon;
/** error message */
private String nvarsMessage = "Attempting to include more variables in regression than exist in model";
/** error message */
private String nobsVsNvarsMessage = "Number of observations not greater than the number of number of variables";
/**
* Set the default constructor to private access
* to prevent inadvertent instantiation
@ -122,7 +119,7 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
*/
public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) {
if (numberOfVariables < 1) {
throw new IllegalArgumentException("NumberOfVariables must be greater than or equal to one");
throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
}
if (includeConstant) {
this.nvars = numberOfVariables + 1;
@ -182,12 +179,15 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
* Adds an observation to the regression model
* @param x the array with regressor values
* @param y the value of dependent variable given these regressors
* @exception ModelSpecificationException if the length of {@code x} does not equal
* the number of independent variables in the model
*/
public void addObservation(final double[] x, final double y) {
if ((!this.hasIntercept && x.length != nvars) ||
(this.hasIntercept && x.length + 1 != nvars)) {
throw new IllegalArgumentException("Length of regressor list is less that numberOfVariables");
throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
x.length, nvars);
}
if (!this.hasIntercept) {
include(MathUtils.copyOf(x, x.length), 1.0, y);
@ -203,13 +203,27 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
}
/**
* Adds multiplier observations to the model
* Adds multiple observations to the model
* @param x observations on the regressors
* @param y observations on the regressand
* @throws ModelSpecificationException if {@code x} is not rectangular, does not match
* the length of {@code y} or does not contain sufficient data to estimate the model
*/
public void addObservations(double[][] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Lengths of x and y matrices must be equal");
if ((x == null) || (y == null) || (x.length != y.length)) {
throw new ModelSpecificationException(
LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
(x == null) ? 0 : x.length,
(y == null) ? 0 : y.length);
}
if (x.length == 0) { // Must be no y data either
throw new ModelSpecificationException(
LocalizedFormats.NO_DATA);
}
if (x[0].length + 1 > x.length) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
x.length, x[0].length);
}
for (int i = 0; i < x.length; i++) {
this.addObservation(x[i], y[i]);
@ -376,8 +390,12 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
*/
private double[] regcf(int nreq) {
int nextr;
if (nreq < 1 || nreq > this.nvars) {
throw new IllegalArgumentException("Number of regressors not correct");
if (nreq < 1) {
throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
}
if (nreq > this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
}
if (!this.tol_set) {
tolset();
@ -911,10 +929,10 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
* Conducts a regression on the data in the model, using all regressors.
*
* @return RegressionResults the structure holding all regression results
* @exception MathException - thrown if number of observations is
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables
*/
public RegressionResults regress() throws MathException {
public RegressionResults regress() throws ModelSpecificationException {
return regress(this.nvars);
}
@ -924,18 +942,19 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
* @param numberOfRegressors many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return RegressionResults the structure holding all regression results
* @exception MathException - thrown if number of observations is
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables or number of regressors requested
* is greater than the regressors in the model
*/
public RegressionResults regress(int numberOfRegressors) throws MathException{
public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
if (this.nobs <= numberOfRegressors) {
Localizable outMsg = new DummyLocalizable(nobsVsNvarsMessage);
throw new MathException(outMsg, (Object) null);
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
this.nobs, numberOfRegressors);
}
if( numberOfRegressors > this.nvars ){
Localizable outMsg = new DummyLocalizable(nvarsMessage);
throw new MathException(outMsg, (Object) null);
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
}
this.tolset();
@ -1008,28 +1027,27 @@ public class MillerUpdatingRegression implements UpdatingMultipleLinearRegressio
*
* @param variablesToInclude array of variables to include in regression
* @return RegressionResults the structure holding all regression results
* @exception MathException - thrown if number of observations is
* less than the number of variables or
* number of regressors requested
* is greater than the regressors in the model or
* a regress or index in regressor array does not exist
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables, the number of regressors requested
* is greater than the regressors in the model or a regressor index in
* regressor array does not exist
*/
public RegressionResults regress(int[] variablesToInclude) throws MathException {
public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
if (variablesToInclude.length > this.nvars) {
Localizable outMsg = new DummyLocalizable(nvarsMessage);
throw new MathException(outMsg, (Object) null);
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
}
if (this.nobs <= this.nvars) {
Localizable outMsg = new DummyLocalizable(nobsVsNvarsMessage);
throw new MathException(outMsg, (Object) null);
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
this.nobs, this.nvars);
}
Arrays.sort(variablesToInclude);
int iExclude = 0;
for (int i = 0; i < variablesToInclude.length; i++) {
if (i >= this.nvars) {
Localizable outMsg = new DummyLocalizable("Requesting variable for inclusion " +
"which does not exist in data supplied");
throw new MathException(outMsg, (Object) null);
throw new ModelSpecificationException(
LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
}
if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
variablesToInclude[i] = -1;

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.regression;
import org.apache.commons.math.exception.MathIllegalArgumentException;
import org.apache.commons.math.exception.util.Localizable;
/**
* Exception thrown when a regression model is not correctly specified.
*
* @since 3.0
* @version $Id$
*/
public class ModelSpecificationException extends MathIllegalArgumentException {
/** Serializable version Id. */
private static final long serialVersionUID = 4206514456095401070L;
/**
* @param pattern message pattern describing the specification error.
*
* @param args arguments.
*/
public ModelSpecificationException(Localizable pattern,
Object ... args) {
super(pattern, args);
}
}

View File

@ -48,8 +48,10 @@ public interface UpdatingMultipleLinearRegression {
*
* @param x the independent variables which form the design matrix
* @param y the dependent or response variable
* @throws ModelSpecificationException if the length of {@code x} does not equal
* the number of independent variables in the model
*/
void addObservation(double[] x, double y);
void addObservation(double[] x, double y) throws ModelSpecificationException;
/**
* Adds a series of observations to the regression model. The lengths of
@ -58,6 +60,8 @@ public interface UpdatingMultipleLinearRegression {
* @param x a series of observations on the independent variables
* @param y a series of observations on the dependent variable
* The length of x and y must be the same
* @throws ModelSpecificationException if {@code x} is not rectangular, does not match
* the length of {@code y} or does not contain sufficient data to estimate the model
*/
void addObservations(double[][] x, double[] y);
@ -71,16 +75,16 @@ public interface UpdatingMultipleLinearRegression {
/**
* Performs a regression on data present in buffers and outputs a RegressionResults object
* @return RegressionResults acts as a container of regression output
* @throws MathException a wide variety of exception cases are possible, check message
* @throws ModelSpecificationException if the model is not correctly specified
*/
RegressionResults regress() throws MathException;
RegressionResults regress() throws ModelSpecificationException;
/**
* Performs a regression on data present in buffers including only regressors
* indexed in variablesToInclude and outputs a RegressionResults object
* @param variablesToInclude an array of indices of regressors to include
* @return RegressionResults acts as a container of regression output
* @throws MathException a wide variety of exception cases are possible, check message
* @throws ModelSpecificationException if the model is not correctly specified
*/
RegressionResults regress(int[] variablesToInclude) throws MathException;
RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException;
}

View File

@ -108,6 +108,7 @@ INVALID_INTERVAL_INITIAL_VALUE_PARAMETERS = param\u00e8tres de l''intervalle ini
INVALID_ITERATIONS_LIMITS = limites d''it\u00e9rations invalides : min = {0}, max = {1}
INVALID_MAX_ITERATIONS = valeur invalide pour le nombre maximal d''it\u00e9rations : {0}
INVALID_REGRESSION_ARRAY= longueur du tableau de donn\u00e9es = {0} ne correspond pas au nombre d''observations = {1} et le nombre de variables explicatives = {2}
INVALID_REGRESSION_OBSERVATION = longueur du tableau de variables explicatives ({0}) ne correspond pas au nombre de variables dans les mod\u00e8le ({1})
INVALID_ROUNDING_METHOD = m\u00e9thode d''arondi {0} invalide, m\u00e9thodes valides : {1} ({2}), {3} ({4}), {5} ({6}), {7} ({8}), {9} ({10}), {11} ({12}), {13} ({14}), {15} ({16})
ITERATOR_EXHAUSTED = it\u00e9ration achev\u00e9e
LCM_OVERFLOW_32_BITS = d\u00e9passement de capacit\u00e9 : le MCM de {0} et {1} vaut 2^31
@ -203,6 +204,7 @@ NO_DEGREES_OF_FREEDOM = aucun degr\u00e9 de libert\u00e9 ({0} mesures, {1} param
NO_DENSITY_FOR_THIS_DISTRIBUTION = La fonction de densit\u00e9 pour cette distribution n''a pas \u00e9t\u00e9 mis en \u0153uvre
NO_FEASIBLE_SOLUTION = aucune solution r\u00e9alisable
NO_OPTIMUM_COMPUTED_YET = aucun optimum n''a encore \u00e9t\u00e9 calcul\u00e9
NO_REGRESSORS = mod\u00e8le de régression doit inclure au moins une variable explicative
NO_RESULT_AVAILABLE = aucun r\u00e9sultat n''est disponible
NO_SUCH_MATRIX_ENTRY = pas d''\u00e9l\u00e9ment ({0}, {1}) dans une matrice {2}x{3}
NULL_NOT_ALLOWED = "null" n''est pas permis
@ -260,6 +262,7 @@ SINGULAR_MATRIX = matrice singuli\u00e8re
SUBARRAY_ENDS_AFTER_ARRAY_END = le sous-tableau se termine apr\u00e8s la fin du tableau
TOO_LARGE_CUTOFF_SINGULAR_VALUE = la valeur singuli\u00e8re de coupure vaut {0}, elle ne devrait pas d\u00e9passer {1}
TOO_MANY_ELEMENTS_TO_DISCARD_FROM_ARRAY = impossible d''enlever {0} \u00e9l\u00e9ments d''un tableau en contenant {1}
TOO_MANY_REGRESSORS = trop de variables explicatives sp\u00e9cifi\u00e9 {0}, il n'y a que {1} dans le mod\u00e8le
TOO_SMALL_COST_RELATIVE_TOLERANCE = trop petite tol\u00e9rance relative sur le co\u00fbt ({0}), aucune r\u00e9duction de la somme des carr\u00e9s n''est possible
TOO_SMALL_INTEGRATION_INTERVAL = intervalle d''int\u00e9gration trop petit : {0}
TOO_SMALL_ORTHOGONALITY_TOLERANCE = trop petite tol\u00e9rance sur l''orthogonalit\u00e9 ({0}), la solution est orthogonale \u00e0 la jacobienne