diff --git a/modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/matrix/stats/InternalMatrixStats.java b/modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/matrix/stats/InternalMatrixStats.java index 8293844f4b6..7dcdff42671 100644 --- a/modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/matrix/stats/InternalMatrixStats.java +++ b/modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/matrix/stats/InternalMatrixStats.java @@ -244,12 +244,15 @@ public class InternalMatrixStats extends InternalAggregation implements MatrixSt } RunningStats runningStats = new RunningStats(); - for (int i=0; i < aggs.size(); ++i) { - runningStats.merge(((InternalMatrixStats) aggs.get(i)).stats); + for (InternalAggregation agg : aggs) { + runningStats.merge(((InternalMatrixStats) agg).stats); } - MatrixStatsResults results = new MatrixStatsResults(runningStats); - return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData()); + if (reduceContext.isFinalReduce()) { + MatrixStatsResults results = new MatrixStatsResults(runningStats); + return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData()); + } + return new InternalMatrixStats(name, runningStats.docCount, runningStats, null, pipelineAggregators(), getMetaData()); } @Override diff --git a/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MatrixStatsAggregatorTests.java b/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MatrixStatsAggregatorTests.java index 0512f3d5db3..44082b16def 100644 --- a/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MatrixStatsAggregatorTests.java +++ b/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MatrixStatsAggregatorTests.java @@ -58,7 +58,6 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37587") public void testTwoFields() throws Exception { String fieldA = "a"; MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE); @@ -89,8 +88,49 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase { IndexSearcher searcher = new IndexSearcher(reader); MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg") .fields(Arrays.asList(fieldA, fieldB)); - InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB); + InternalMatrixStats stats = search(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB); + // Since `search` doesn't do any reduction, and the InternalMatrixStats object will have a null `MatrixStatsResults` + // object. That is created during the final reduction, which also does a final round of computations + // So we have to create a MatrixStatsResults object here manually so that the final `compute()` is called multiPassStats.assertNearlyEqual(new MatrixStatsResults(stats.getStats())); + } + } + } + + public void testTwoFieldsReduce() throws Exception { + String fieldA = "a"; + MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE); + ftA.setName(fieldA); + String fieldB = "b"; + MappedFieldType ftB = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE); + ftB.setName(fieldB); + + try (Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + + int numDocs = scaledRandomIntBetween(8192, 16384); + Double[] fieldAValues = new Double[numDocs]; + Double[] fieldBValues = new Double[numDocs]; + for (int docId = 0; docId < numDocs; docId++) { + Document document = new Document(); + fieldAValues[docId] = randomDouble(); + document.add(new SortedNumericDocValuesField(fieldA, NumericUtils.doubleToSortableLong(fieldAValues[docId]))); + + fieldBValues[docId] = randomDouble(); + document.add(new SortedNumericDocValuesField(fieldB, NumericUtils.doubleToSortableLong(fieldBValues[docId]))); + indexWriter.addDocument(document); + } + + MultiPassStats multiPassStats = new MultiPassStats(fieldA, fieldB); + multiPassStats.computeStats(Arrays.asList(fieldAValues), Arrays.asList(fieldBValues)); + try (IndexReader reader = indexWriter.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg") + .fields(Arrays.asList(fieldA, fieldB)); + InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB); + // Unlike testTwoFields, `searchAndReduce` will execute reductions so the `MatrixStatsResults` object + // will be populated and fully computed. We should use that value directly to test against + multiPassStats.assertNearlyEqual(stats); assertTrue(MatrixAggregationInspectionHelper.hasValue(stats)); } } diff --git a/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MultiPassStats.java b/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MultiPassStats.java index b5a348f45eb..cd4ee3ee849 100644 --- a/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MultiPassStats.java +++ b/modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MultiPassStats.java @@ -136,6 +136,30 @@ class MultiPassStats { assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7)); } + void assertNearlyEqual(InternalMatrixStats stats) { + assertEquals(count, stats.getDocCount()); + assertEquals(count, stats.getFieldCount(fieldAKey)); + assertEquals(count, stats.getFieldCount(fieldBKey)); + // means + assertTrue(nearlyEqual(means.get(fieldAKey), stats.getMean(fieldAKey), 1e-7)); + assertTrue(nearlyEqual(means.get(fieldBKey), stats.getMean(fieldBKey), 1e-7)); + // variances + assertTrue(nearlyEqual(variances.get(fieldAKey), stats.getVariance(fieldAKey), 1e-7)); + assertTrue(nearlyEqual(variances.get(fieldBKey), stats.getVariance(fieldBKey), 1e-7)); + // skewness (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance) + assertTrue(nearlyEqual(skewness.get(fieldAKey), stats.getSkewness(fieldAKey), 1e-4)); + assertTrue(nearlyEqual(skewness.get(fieldBKey), stats.getSkewness(fieldBKey), 1e-4)); + // kurtosis (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance) + assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4)); + assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4)); + // covariances + assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7)); + assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7)); + // correlation + assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7)); + assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7)); + } + private static boolean nearlyEqual(double a, double b, double epsilon) { final double absA = Math.abs(a); final double absB = Math.abs(b);