HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik Krogen.

(cherry picked from commit 3dbad5d823)
(cherry picked from commit b245e9ce2f)
(cherry picked from commit 384b7b71a3)
This commit is contained in:
Zhe Zhang 2016-11-07 16:08:10 -08:00
parent c1a6f4f2ee
commit 64657cd675
4 changed files with 39 additions and 4 deletions

View File

@ -90,6 +90,9 @@ Release 2.7.4 - UNRELEASED
HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses.
(Daryn Sharp via yliu)
HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used.
(Erik Krogen via zhz)
Release 2.7.3 - 2016-08-25
INCOMPATIBLE CHANGES

View File

@ -99,6 +99,10 @@ public class MutableStat extends MutableMetric {
/**
* Add a number of samples and their sum to the running stat
*
* Note that although use of this method will preserve accurate mean values,
* large values for numSamples may result in inaccurate variance values due
* to the use of a single step of the Welford method for variance calculation.
* @param numSamples number of samples
* @param sum of the samples
*/

View File

@ -27,29 +27,32 @@ import org.apache.hadoop.classification.InterfaceAudience;
public class SampleStat {
private final MinMax minmax = new MinMax();
private long numSamples = 0;
private double a0, a1, s0, s1;
private double a0, a1, s0, s1, total;
/**
* Construct a new running sample stat
*/
public SampleStat() {
a0 = s0 = 0.0;
total = 0.0;
}
public void reset() {
numSamples = 0;
a0 = s0 = 0.0;
total = 0.0;
minmax.reset();
}
// We want to reuse the object, sometimes.
void reset(long numSamples, double a0, double a1, double s0, double s1,
MinMax minmax) {
double total, MinMax minmax) {
this.numSamples = numSamples;
this.a0 = a0;
this.a1 = a1;
this.s0 = s0;
this.s1 = s1;
this.total = total;
this.minmax.reset(minmax);
}
@ -58,7 +61,7 @@ public class SampleStat {
* @param other the destination to hold our values
*/
public void copyTo(SampleStat other) {
other.reset(numSamples, a0, a1, s0, s1, minmax);
other.reset(numSamples, a0, a1, s0, s1, total, minmax);
}
/**
@ -80,6 +83,7 @@ public class SampleStat {
*/
public SampleStat add(long nSamples, double x) {
numSamples += nSamples;
total += x;
if (numSamples == 1) {
a0 = a1 = x;
@ -102,11 +106,18 @@ public class SampleStat {
return numSamples;
}
/**
* @return the total of all samples added
*/
public double total() {
return total;
}
/**
* @return the arithmetic mean of the samples
*/
public double mean() {
return numSamples > 0 ? a1 : 0.0;
return numSamples > 0 ? (total / numSamples) : 0.0;
}
/**

View File

@ -115,6 +115,23 @@ public class TestMutableMetrics {
assertGauge("BarAvgTime", 0.0, rb);
}
/**
* Tests that when using {@link MutableStat#add(long, long)}, even with a high
* sample count, the mean does not lose accuracy.
*/
@Test public void testMutableStatWithBulkAdd() {
MetricsRecordBuilder rb = mockMetricsRecordBuilder();
MetricsRegistry registry = new MetricsRegistry("test");
MutableStat stat = registry.newStat("Test", "Test", "Ops", "Val", false);
stat.add(1000, 1000);
stat.add(1000, 2000);
registry.snapshot(rb, false);
assertCounter("TestNumOps", 2000L, rb);
assertGauge("TestAvgVal", 1.5, rb);
}
/**
* Ensure that quantile estimates from {@link MutableQuantiles} are within
* specified error bounds.