HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik Krogen.
(cherry picked from commit3dbad5d823
) (cherry picked from commitb245e9ce2f
) (cherry picked from commit384b7b71a3
)
This commit is contained in:
parent
c1a6f4f2ee
commit
64657cd675
|
@ -90,6 +90,9 @@ Release 2.7.4 - UNRELEASED
|
||||||
HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses.
|
HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses.
|
||||||
(Daryn Sharp via yliu)
|
(Daryn Sharp via yliu)
|
||||||
|
|
||||||
|
HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used.
|
||||||
|
(Erik Krogen via zhz)
|
||||||
|
|
||||||
Release 2.7.3 - 2016-08-25
|
Release 2.7.3 - 2016-08-25
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -99,6 +99,10 @@ public class MutableStat extends MutableMetric {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a number of samples and their sum to the running stat
|
* Add a number of samples and their sum to the running stat
|
||||||
|
*
|
||||||
|
* Note that although use of this method will preserve accurate mean values,
|
||||||
|
* large values for numSamples may result in inaccurate variance values due
|
||||||
|
* to the use of a single step of the Welford method for variance calculation.
|
||||||
* @param numSamples number of samples
|
* @param numSamples number of samples
|
||||||
* @param sum of the samples
|
* @param sum of the samples
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -27,29 +27,32 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
public class SampleStat {
|
public class SampleStat {
|
||||||
private final MinMax minmax = new MinMax();
|
private final MinMax minmax = new MinMax();
|
||||||
private long numSamples = 0;
|
private long numSamples = 0;
|
||||||
private double a0, a1, s0, s1;
|
private double a0, a1, s0, s1, total;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new running sample stat
|
* Construct a new running sample stat
|
||||||
*/
|
*/
|
||||||
public SampleStat() {
|
public SampleStat() {
|
||||||
a0 = s0 = 0.0;
|
a0 = s0 = 0.0;
|
||||||
|
total = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
public void reset() {
|
||||||
numSamples = 0;
|
numSamples = 0;
|
||||||
a0 = s0 = 0.0;
|
a0 = s0 = 0.0;
|
||||||
|
total = 0.0;
|
||||||
minmax.reset();
|
minmax.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We want to reuse the object, sometimes.
|
// We want to reuse the object, sometimes.
|
||||||
void reset(long numSamples, double a0, double a1, double s0, double s1,
|
void reset(long numSamples, double a0, double a1, double s0, double s1,
|
||||||
MinMax minmax) {
|
double total, MinMax minmax) {
|
||||||
this.numSamples = numSamples;
|
this.numSamples = numSamples;
|
||||||
this.a0 = a0;
|
this.a0 = a0;
|
||||||
this.a1 = a1;
|
this.a1 = a1;
|
||||||
this.s0 = s0;
|
this.s0 = s0;
|
||||||
this.s1 = s1;
|
this.s1 = s1;
|
||||||
|
this.total = total;
|
||||||
this.minmax.reset(minmax);
|
this.minmax.reset(minmax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +61,7 @@ public class SampleStat {
|
||||||
* @param other the destination to hold our values
|
* @param other the destination to hold our values
|
||||||
*/
|
*/
|
||||||
public void copyTo(SampleStat other) {
|
public void copyTo(SampleStat other) {
|
||||||
other.reset(numSamples, a0, a1, s0, s1, minmax);
|
other.reset(numSamples, a0, a1, s0, s1, total, minmax);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -80,6 +83,7 @@ public class SampleStat {
|
||||||
*/
|
*/
|
||||||
public SampleStat add(long nSamples, double x) {
|
public SampleStat add(long nSamples, double x) {
|
||||||
numSamples += nSamples;
|
numSamples += nSamples;
|
||||||
|
total += x;
|
||||||
|
|
||||||
if (numSamples == 1) {
|
if (numSamples == 1) {
|
||||||
a0 = a1 = x;
|
a0 = a1 = x;
|
||||||
|
@ -102,11 +106,18 @@ public class SampleStat {
|
||||||
return numSamples;
|
return numSamples;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the total of all samples added
|
||||||
|
*/
|
||||||
|
public double total() {
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the arithmetic mean of the samples
|
* @return the arithmetic mean of the samples
|
||||||
*/
|
*/
|
||||||
public double mean() {
|
public double mean() {
|
||||||
return numSamples > 0 ? a1 : 0.0;
|
return numSamples > 0 ? (total / numSamples) : 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -115,6 +115,23 @@ public class TestMutableMetrics {
|
||||||
assertGauge("BarAvgTime", 0.0, rb);
|
assertGauge("BarAvgTime", 0.0, rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that when using {@link MutableStat#add(long, long)}, even with a high
|
||||||
|
* sample count, the mean does not lose accuracy.
|
||||||
|
*/
|
||||||
|
@Test public void testMutableStatWithBulkAdd() {
|
||||||
|
MetricsRecordBuilder rb = mockMetricsRecordBuilder();
|
||||||
|
MetricsRegistry registry = new MetricsRegistry("test");
|
||||||
|
MutableStat stat = registry.newStat("Test", "Test", "Ops", "Val", false);
|
||||||
|
|
||||||
|
stat.add(1000, 1000);
|
||||||
|
stat.add(1000, 2000);
|
||||||
|
registry.snapshot(rb, false);
|
||||||
|
|
||||||
|
assertCounter("TestNumOps", 2000L, rb);
|
||||||
|
assertGauge("TestAvgVal", 1.5, rb);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure that quantile estimates from {@link MutableQuantiles} are within
|
* Ensure that quantile estimates from {@link MutableQuantiles} are within
|
||||||
* specified error bounds.
|
* specified error bounds.
|
||||||
|
|
Loading…
Reference in New Issue