diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c869571eeb8..9ed18a7ebde 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -90,6 +90,9 @@ Release 2.7.4 - UNRELEASED HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses. (Daryn Sharp via yliu) + HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. + (Erik Krogen via zhz) + Release 2.7.3 - 2016-08-25 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java index f104420a983..b5d99290450 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java @@ -99,6 +99,10 @@ public class MutableStat extends MutableMetric { /** * Add a number of samples and their sum to the running stat + * + * Note that although use of this method will preserve accurate mean values, + * large values for numSamples may result in inaccurate variance values due + * to the use of a single step of the Welford method for variance calculation. * @param numSamples number of samples * @param sum of the samples */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java index 589062a691c..be00a65bc99 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java @@ -27,29 +27,32 @@ import org.apache.hadoop.classification.InterfaceAudience; public class SampleStat { private final MinMax minmax = new MinMax(); private long numSamples = 0; - private double a0, a1, s0, s1; + private double a0, a1, s0, s1, total; /** * Construct a new running sample stat */ public SampleStat() { a0 = s0 = 0.0; + total = 0.0; } public void reset() { numSamples = 0; a0 = s0 = 0.0; + total = 0.0; minmax.reset(); } // We want to reuse the object, sometimes. void reset(long numSamples, double a0, double a1, double s0, double s1, - MinMax minmax) { + double total, MinMax minmax) { this.numSamples = numSamples; this.a0 = a0; this.a1 = a1; this.s0 = s0; this.s1 = s1; + this.total = total; this.minmax.reset(minmax); } @@ -58,7 +61,7 @@ public class SampleStat { * @param other the destination to hold our values */ public void copyTo(SampleStat other) { - other.reset(numSamples, a0, a1, s0, s1, minmax); + other.reset(numSamples, a0, a1, s0, s1, total, minmax); } /** @@ -80,6 +83,7 @@ public class SampleStat { */ public SampleStat add(long nSamples, double x) { numSamples += nSamples; + total += x; if (numSamples == 1) { a0 = a1 = x; @@ -102,11 +106,18 @@ public class SampleStat { return numSamples; } + /** + * @return the total of all samples added + */ + public double total() { + return total; + } + /** * @return the arithmetic mean of the samples */ public double mean() { - return numSamples > 0 ? a1 : 0.0; + return numSamples > 0 ? (total / numSamples) : 0.0; } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java index ed83000390e..9982d4eba7d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java @@ -115,6 +115,23 @@ public class TestMutableMetrics { assertGauge("BarAvgTime", 0.0, rb); } + /** + * Tests that when using {@link MutableStat#add(long, long)}, even with a high + * sample count, the mean does not lose accuracy. + */ + @Test public void testMutableStatWithBulkAdd() { + MetricsRecordBuilder rb = mockMetricsRecordBuilder(); + MetricsRegistry registry = new MetricsRegistry("test"); + MutableStat stat = registry.newStat("Test", "Test", "Ops", "Val", false); + + stat.add(1000, 1000); + stat.add(1000, 2000); + registry.snapshot(rb, false); + + assertCounter("TestNumOps", 2000L, rb); + assertGauge("TestAvgVal", 1.5, rb); + } + /** * Ensure that quantile estimates from {@link MutableQuantiles} are within * specified error bounds.