Added a normalization feature to transform samples so they have zero mean and unit standard deviation

Jira: MATH-426

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_X@1037332 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Luc Maisonobe 2010-11-20 21:31:21 +00:00
parent f27e3173c3
commit 22686ad09c
3 changed files with 78 additions and 0 deletions

View File

@ -18,6 +18,7 @@ package org.apache.commons.math.stat;
import org.apache.commons.math.MathRuntimeException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
import org.apache.commons.math.stat.descriptive.moment.Mean;
@ -628,5 +629,34 @@ public final class StatUtils {
}
return (sum1 - (sum2 * sum2 / n)) / (n - 1);
}
/**
* Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
*
* @param sample sample to normalize
* @return normalized (standardized) sample
*/
public static double[] normalize(final double[] sample) {
DescriptiveStatistics stats = new DescriptiveStatistics();
// Add the data from the series to stats
for (int i = 0; i < sample.length; i++) {
stats.addValue(sample[i]);
}
// Compute mean and standard deviation
double mean = stats.getMean();
double standardDeviation = stats.getStandardDeviation();
// initialize the standardizedSample, which has the same length as the sample
double[] standardizedSample = new double[sample.length];
for (int i = 0; i < sample.length; i++) {
// z = (x- mean)/standardDeviation
standardizedSample[i] = (sample[i] - mean) / standardDeviation;
}
return standardizedSample;
}
}

View File

@ -52,6 +52,9 @@ The <action> type attribute can be add,update,fix,remove.
If the output is not quite correct, check for invisible trailing spaces!
-->
<release version="2.2" date="TBD" description="TBD">
<action dev="luc" type="fix" issue="MATH-426" due-to="Erik van Ingen">
Added a normalization feature to transform samples so they have zero mean and unit standard deviation
</action>
<action dev="erans" type="add" issue="MATH-440">
Created "MathUserException" class to convey cause of failure between
layers of user code separated by a layer of Commons-Math code. Deprecated

View File

@ -19,6 +19,7 @@ package org.apache.commons.math.stat;
import junit.framework.TestCase;
import org.apache.commons.math.TestUtils;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import org.apache.commons.math.util.FastMath;
/**
@ -420,4 +421,48 @@ public final class StatUtilsTest extends TestCase {
assertEquals(FastMath.exp(0.5 * StatUtils.sumLog(test, 0, 2)),
StatUtils.geometricMean(test, 0, 2), Double.MIN_VALUE);
}
/**
* Run the test with the values 50 and 100 and assume standardized values
*/
public void testNormalize1() {
double sample[] = { 50, 100 };
double expectedSample[] = { -25 / Math.sqrt(1250), 25 / Math.sqrt(1250) };
double[] out = StatUtils.normalize(sample);
for (int i = 0; i < out.length; i++) {
assertEquals(out[i], expectedSample[i]);
}
}
/**
* Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a
* precision of 1E-10.
*/
public void testNormalize2() {
// create an sample with 77 values
int length = 77;
double sample[] = new double[length];
for (int i = 0; i < length; i++) {
sample[i] = Math.random();
}
// normalize this sample
double standardizedSample[] = StatUtils.normalize(sample);
DescriptiveStatistics stats = new DescriptiveStatistics();
// Add the data from the array
for (int i = 0; i < length; i++) {
stats.addValue(standardizedSample[i]);
}
// the calculations do have a limited precision
double distance = 1E-10;
// check the mean an standard deviation
assertEquals(0.0, stats.getMean(), distance);
assertEquals(1.0, stats.getStandardDeviation(), distance);
}
}