Only create final MatrixStatsResults on final reduction (#39205)
MatrixStatsResults is the "final" result object, and runs an additional computation in it's ctor to calculate covariance, etc. This means it should only run on the final reduction instead of on every reduce.
This commit is contained in:
parent
b9f8be6968
commit
8af0e7c4b6
|
@ -244,12 +244,15 @@ public class InternalMatrixStats extends InternalAggregation implements MatrixSt
|
|||
}
|
||||
|
||||
RunningStats runningStats = new RunningStats();
|
||||
for (int i=0; i < aggs.size(); ++i) {
|
||||
runningStats.merge(((InternalMatrixStats) aggs.get(i)).stats);
|
||||
for (InternalAggregation agg : aggs) {
|
||||
runningStats.merge(((InternalMatrixStats) agg).stats);
|
||||
}
|
||||
MatrixStatsResults results = new MatrixStatsResults(runningStats);
|
||||
|
||||
return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
|
||||
if (reduceContext.isFinalReduce()) {
|
||||
MatrixStatsResults results = new MatrixStatsResults(runningStats);
|
||||
return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
|
||||
}
|
||||
return new InternalMatrixStats(name, runningStats.docCount, runningStats, null, pipelineAggregators(), getMetaData());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -58,7 +58,6 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37587")
|
||||
public void testTwoFields() throws Exception {
|
||||
String fieldA = "a";
|
||||
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
|
||||
|
@ -89,8 +88,49 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase {
|
|||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
|
||||
.fields(Arrays.asList(fieldA, fieldB));
|
||||
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
|
||||
InternalMatrixStats stats = search(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
|
||||
// Since `search` doesn't do any reduction, and the InternalMatrixStats object will have a null `MatrixStatsResults`
|
||||
// object. That is created during the final reduction, which also does a final round of computations
|
||||
// So we have to create a MatrixStatsResults object here manually so that the final `compute()` is called
|
||||
multiPassStats.assertNearlyEqual(new MatrixStatsResults(stats.getStats()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testTwoFieldsReduce() throws Exception {
|
||||
String fieldA = "a";
|
||||
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
|
||||
ftA.setName(fieldA);
|
||||
String fieldB = "b";
|
||||
MappedFieldType ftB = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
|
||||
ftB.setName(fieldB);
|
||||
|
||||
try (Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
|
||||
|
||||
int numDocs = scaledRandomIntBetween(8192, 16384);
|
||||
Double[] fieldAValues = new Double[numDocs];
|
||||
Double[] fieldBValues = new Double[numDocs];
|
||||
for (int docId = 0; docId < numDocs; docId++) {
|
||||
Document document = new Document();
|
||||
fieldAValues[docId] = randomDouble();
|
||||
document.add(new SortedNumericDocValuesField(fieldA, NumericUtils.doubleToSortableLong(fieldAValues[docId])));
|
||||
|
||||
fieldBValues[docId] = randomDouble();
|
||||
document.add(new SortedNumericDocValuesField(fieldB, NumericUtils.doubleToSortableLong(fieldBValues[docId])));
|
||||
indexWriter.addDocument(document);
|
||||
}
|
||||
|
||||
MultiPassStats multiPassStats = new MultiPassStats(fieldA, fieldB);
|
||||
multiPassStats.computeStats(Arrays.asList(fieldAValues), Arrays.asList(fieldBValues));
|
||||
try (IndexReader reader = indexWriter.getReader()) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
|
||||
.fields(Arrays.asList(fieldA, fieldB));
|
||||
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
|
||||
// Unlike testTwoFields, `searchAndReduce` will execute reductions so the `MatrixStatsResults` object
|
||||
// will be populated and fully computed. We should use that value directly to test against
|
||||
multiPassStats.assertNearlyEqual(stats);
|
||||
assertTrue(MatrixAggregationInspectionHelper.hasValue(stats));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -136,6 +136,30 @@ class MultiPassStats {
|
|||
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
|
||||
}
|
||||
|
||||
void assertNearlyEqual(InternalMatrixStats stats) {
|
||||
assertEquals(count, stats.getDocCount());
|
||||
assertEquals(count, stats.getFieldCount(fieldAKey));
|
||||
assertEquals(count, stats.getFieldCount(fieldBKey));
|
||||
// means
|
||||
assertTrue(nearlyEqual(means.get(fieldAKey), stats.getMean(fieldAKey), 1e-7));
|
||||
assertTrue(nearlyEqual(means.get(fieldBKey), stats.getMean(fieldBKey), 1e-7));
|
||||
// variances
|
||||
assertTrue(nearlyEqual(variances.get(fieldAKey), stats.getVariance(fieldAKey), 1e-7));
|
||||
assertTrue(nearlyEqual(variances.get(fieldBKey), stats.getVariance(fieldBKey), 1e-7));
|
||||
// skewness (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
|
||||
assertTrue(nearlyEqual(skewness.get(fieldAKey), stats.getSkewness(fieldAKey), 1e-4));
|
||||
assertTrue(nearlyEqual(skewness.get(fieldBKey), stats.getSkewness(fieldBKey), 1e-4));
|
||||
// kurtosis (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
|
||||
assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4));
|
||||
assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4));
|
||||
// covariances
|
||||
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
|
||||
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
|
||||
// correlation
|
||||
assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7));
|
||||
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
|
||||
}
|
||||
|
||||
private static boolean nearlyEqual(double a, double b, double epsilon) {
|
||||
final double absA = Math.abs(a);
|
||||
final double absB = Math.abs(b);
|
||||
|
|
Loading…
Reference in New Issue