Added a normalization feature to transform samples so they have zero mean and unit standard deviation
Jira: MATH-426 git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_X@1037332 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f27e3173c3
commit
22686ad09c
|
@ -18,6 +18,7 @@ package org.apache.commons.math.stat;
|
|||
|
||||
import org.apache.commons.math.MathRuntimeException;
|
||||
import org.apache.commons.math.exception.util.LocalizedFormats;
|
||||
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
|
||||
import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
|
||||
import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
|
||||
import org.apache.commons.math.stat.descriptive.moment.Mean;
|
||||
|
@ -628,5 +629,34 @@ public final class StatUtils {
|
|||
}
|
||||
return (sum1 - (sum2 * sum2 / n)) / (n - 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
|
||||
*
|
||||
* @param sample sample to normalize
|
||||
* @return normalized (standardized) sample
|
||||
*/
|
||||
public static double[] normalize(final double[] sample) {
|
||||
DescriptiveStatistics stats = new DescriptiveStatistics();
|
||||
|
||||
// Add the data from the series to stats
|
||||
for (int i = 0; i < sample.length; i++) {
|
||||
stats.addValue(sample[i]);
|
||||
}
|
||||
|
||||
// Compute mean and standard deviation
|
||||
double mean = stats.getMean();
|
||||
double standardDeviation = stats.getStandardDeviation();
|
||||
|
||||
// initialize the standardizedSample, which has the same length as the sample
|
||||
double[] standardizedSample = new double[sample.length];
|
||||
|
||||
for (int i = 0; i < sample.length; i++) {
|
||||
// z = (x- mean)/standardDeviation
|
||||
standardizedSample[i] = (sample[i] - mean) / standardDeviation;
|
||||
}
|
||||
return standardizedSample;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -52,6 +52,9 @@ The <action> type attribute can be add,update,fix,remove.
|
|||
If the output is not quite correct, check for invisible trailing spaces!
|
||||
-->
|
||||
<release version="2.2" date="TBD" description="TBD">
|
||||
<action dev="luc" type="fix" issue="MATH-426" due-to="Erik van Ingen">
|
||||
Added a normalization feature to transform samples so they have zero mean and unit standard deviation
|
||||
</action>
|
||||
<action dev="erans" type="add" issue="MATH-440">
|
||||
Created "MathUserException" class to convey cause of failure between
|
||||
layers of user code separated by a layer of Commons-Math code. Deprecated
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.commons.math.stat;
|
|||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.commons.math.TestUtils;
|
||||
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
|
||||
import org.apache.commons.math.util.FastMath;
|
||||
|
||||
/**
|
||||
|
@ -420,4 +421,48 @@ public final class StatUtilsTest extends TestCase {
|
|||
assertEquals(FastMath.exp(0.5 * StatUtils.sumLog(test, 0, 2)),
|
||||
StatUtils.geometricMean(test, 0, 2), Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Run the test with the values 50 and 100 and assume standardized values
|
||||
*/
|
||||
|
||||
public void testNormalize1() {
|
||||
double sample[] = { 50, 100 };
|
||||
double expectedSample[] = { -25 / Math.sqrt(1250), 25 / Math.sqrt(1250) };
|
||||
double[] out = StatUtils.normalize(sample);
|
||||
for (int i = 0; i < out.length; i++) {
|
||||
assertEquals(out[i], expectedSample[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a
|
||||
* precision of 1E-10.
|
||||
*/
|
||||
|
||||
public void testNormalize2() {
|
||||
// create an sample with 77 values
|
||||
int length = 77;
|
||||
double sample[] = new double[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
sample[i] = Math.random();
|
||||
}
|
||||
// normalize this sample
|
||||
double standardizedSample[] = StatUtils.normalize(sample);
|
||||
|
||||
DescriptiveStatistics stats = new DescriptiveStatistics();
|
||||
// Add the data from the array
|
||||
for (int i = 0; i < length; i++) {
|
||||
stats.addValue(standardizedSample[i]);
|
||||
}
|
||||
// the calculations do have a limited precision
|
||||
double distance = 1E-10;
|
||||
// check the mean an standard deviation
|
||||
assertEquals(0.0, stats.getMean(), distance);
|
||||
assertEquals(1.0, stats.getStandardDeviation(), distance);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue