mirror of
https://github.com/apache/commons-math.git
synced 2025-02-11 12:36:05 +00:00
added removeData methods to the SimpleRegression class.
This allows to support regression calculations across a sliding window of (time-based) observations without having to recalculate for the entire window every time. JIRA: MATH-219 git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_0@687021 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
be008314f3
commit
07e312f0c3
@ -139,6 +139,39 @@ public class SimpleRegression implements Serializable {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes the observation (x,y) from the regression data set.
|
||||
* <p>
|
||||
* Mirrors the addData method. This method permits the use of
|
||||
* SimpleRegression instances in streaming mode where the regression
|
||||
* is applied to a sliding "window" of observations, however the caller is
|
||||
* responsible for maintaining the set of observations in the window.</p>
|
||||
*
|
||||
* The method has no effect if there are no points of data (i.e. n=0)
|
||||
*
|
||||
* @param x independent variable value
|
||||
* @param y dependent variable value
|
||||
*/
|
||||
public void removeData(double x, double y) {
|
||||
if (n > 0) {
|
||||
double dx = x - xbar;
|
||||
double dy = y - ybar;
|
||||
sumXX -= dx * dx * (double) n / (double) (n - 1.0);
|
||||
sumYY -= dy * dy * (double) n / (double) (n - 1.0);
|
||||
sumXY -= dx * dy * (double) n / (double) (n - 1.0);
|
||||
xbar -= dx / (double) (n - 1.0);
|
||||
ybar -= dy / (double) (n - 1.0);
|
||||
sumX -= x;
|
||||
sumY -= y;
|
||||
n--;
|
||||
|
||||
if (n > 2) {
|
||||
distribution.setDegreesOfFreedom(n - 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the observations represented by the elements in
|
||||
* <code>data</code>.
|
||||
@ -161,6 +194,26 @@ public class SimpleRegression implements Serializable {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes observations represented by the elements in <code>data</code>.
|
||||
* <p>
|
||||
* If the array is larger than the current n, only the first n elements are
|
||||
* processed. This method permits the use of SimpleRegression instances in
|
||||
* streaming mode where the regression is applied to a sliding "window" of
|
||||
* observations, however the caller is responsible for maintaining the set
|
||||
* of observations in the window.</p>
|
||||
* <p>
|
||||
* To remove all data, use <code>clear()</code>.</p>
|
||||
*
|
||||
* @param data array of observations to be removed
|
||||
*/
|
||||
public void removeData(double[][] data) {
|
||||
for (int i = 0; i < data.length && n > 0; i++) {
|
||||
removeData(data[i][0], data[i][1]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all data from the model.
|
||||
*/
|
||||
|
@ -39,6 +39,11 @@ The <action> type attribute can be add,update,fix,remove.
|
||||
</properties>
|
||||
<body>
|
||||
<release version="2.0" date="TBD" description="TBD">
|
||||
<action dev="luc" type="add" issue="MATH-219" due-to="Andrew Berry">
|
||||
Added removeData methods for the SimpleRegression class. This allows
|
||||
to support regression calculations across a sliding window of (time-based)
|
||||
observations without having to recalculate for the entire window every time.
|
||||
</action>
|
||||
<action dev="luc" type="add" due-to="Andreas Rieger">
|
||||
Support for one dimensional vectors has been added to the linear algebra
|
||||
package with a RealVector interface, a RealVectorImpl default implementation
|
||||
|
@ -62,6 +62,15 @@ public final class SimpleRegressionTest extends TestCase {
|
||||
{35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
|
||||
};
|
||||
|
||||
/*
|
||||
* Points to remove in the remove tests
|
||||
*/
|
||||
private double[][] removeSingle = {infData[1]};
|
||||
private double[][] removeMultiple = { infData[1], infData[2] };
|
||||
private double removeX = infData[0][0];
|
||||
private double removeY = infData[0][1];
|
||||
|
||||
|
||||
/*
|
||||
* Data with bad linear fit
|
||||
*/
|
||||
@ -275,4 +284,90 @@ public final class SimpleRegressionTest extends TestCase {
|
||||
}
|
||||
assertTrue(reg.getSumSquaredErrors() >= 0.0);
|
||||
}
|
||||
|
||||
// Test remove X,Y (single observation)
|
||||
public void testRemoveXY() throws Exception {
|
||||
// Create regression with inference data then remove to test
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.addData(infData);
|
||||
regression.removeData(removeX, removeY);
|
||||
regression.addData(removeX, removeY);
|
||||
// Use the inference assertions to make sure that everything worked
|
||||
assertEquals("slope std err", 0.011448491,
|
||||
regression.getSlopeStdErr(), 1E-10);
|
||||
assertEquals("std err intercept", 0.286036932,
|
||||
regression.getInterceptStdErr(),1E-8);
|
||||
assertEquals("significance", 4.596e-07,
|
||||
regression.getSignificance(),1E-8);
|
||||
assertEquals("slope conf interval half-width", 0.0270713794287,
|
||||
regression.getSlopeConfidenceInterval(),1E-8);
|
||||
}
|
||||
|
||||
|
||||
// Test remove single observation in array
|
||||
public void testRemoveSingle() throws Exception {
|
||||
// Create regression with inference data then remove to test
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.addData(infData);
|
||||
regression.removeData(removeSingle);
|
||||
regression.addData(removeSingle);
|
||||
// Use the inference assertions to make sure that everything worked
|
||||
assertEquals("slope std err", 0.011448491,
|
||||
regression.getSlopeStdErr(), 1E-10);
|
||||
assertEquals("std err intercept", 0.286036932,
|
||||
regression.getInterceptStdErr(),1E-8);
|
||||
assertEquals("significance", 4.596e-07,
|
||||
regression.getSignificance(),1E-8);
|
||||
assertEquals("slope conf interval half-width", 0.0270713794287,
|
||||
regression.getSlopeConfidenceInterval(),1E-8);
|
||||
}
|
||||
|
||||
// Test remove multiple observations
|
||||
public void testRemoveMultiple() throws Exception {
|
||||
// Create regression with inference data then remove to test
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.addData(infData);
|
||||
regression.removeData(removeMultiple);
|
||||
regression.addData(removeMultiple);
|
||||
// Use the inference assertions to make sure that everything worked
|
||||
assertEquals("slope std err", 0.011448491,
|
||||
regression.getSlopeStdErr(), 1E-10);
|
||||
assertEquals("std err intercept", 0.286036932,
|
||||
regression.getInterceptStdErr(),1E-8);
|
||||
assertEquals("significance", 4.596e-07,
|
||||
regression.getSignificance(),1E-8);
|
||||
assertEquals("slope conf interval half-width", 0.0270713794287,
|
||||
regression.getSlopeConfidenceInterval(),1E-8);
|
||||
}
|
||||
|
||||
// Remove observation when empty
|
||||
public void testRemoveObsFromEmpty() {
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.removeData(removeX, removeY);
|
||||
assertEquals(regression.getN(), 0);
|
||||
}
|
||||
|
||||
// Remove single observation to empty
|
||||
public void testRemoveObsFromSingle() {
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.addData(removeX, removeY);
|
||||
regression.removeData(removeX, removeY);
|
||||
assertEquals(regression.getN(), 0);
|
||||
}
|
||||
|
||||
// Remove multiple observations to empty
|
||||
public void testRemoveMultipleToEmpty() {
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.addData(removeMultiple);
|
||||
regression.removeData(removeMultiple);
|
||||
assertEquals(regression.getN(), 0);
|
||||
}
|
||||
|
||||
// Remove multiple observations past empty (i.e. size of array > n)
|
||||
public void testRemoveMultiplePastEmpty() {
|
||||
SimpleRegression regression = new SimpleRegression();
|
||||
regression.addData(removeX, removeY);
|
||||
regression.removeData(removeMultiple);
|
||||
assertEquals(regression.getN(), 0);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user