aggs: Add a method that is invoked before the `getLeafCollector(...)` of children aggregators is invoked.

In the case of nested aggregator this allows it to push down buffered child docs down to children aggregator.
Before this was done as part of the `NestedAggregator#getLeafCollector(...)`, but by then the children aggregators
have already moved on to the next segment and this causes incorrect results to be produced.

Closes #27912
This commit is contained in:
Martijn van Groningen 2017-12-21 17:09:49 +01:00
parent cbd271e497
commit 791c5ddd7e
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
3 changed files with 136 additions and 3 deletions

View File

@ -162,10 +162,18 @@ public abstract class AggregatorBase extends Aggregator {
@Override
public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
preGetSubLeafCollectors();
final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
return getLeafCollector(ctx, sub);
}
/**
* Can be overridden by aggregator implementations that like the perform an operation before the leaf collectors
* of children aggregators are instantiated for the next segment.
*/
protected void preGetSubLeafCollectors() throws IOException {
}
/**
* Can be overridden by aggregator implementation to be called back when the collection phase starts.
*/

View File

@ -102,13 +102,21 @@ class NestedAggregator extends BucketsAggregator implements SingleBucketAggregat
}
};
} else {
doPostCollection();
return bufferingNestedLeafBucketCollector = new BufferingNestedLeafBucketCollector(sub, parentDocs, childDocs);
}
}
@Override
protected void preGetSubLeafCollectors() throws IOException {
processBufferedDocs();
}
@Override
protected void doPostCollection() throws IOException {
processBufferedDocs();
}
private void processBufferedDocs() throws IOException {
if (bufferingNestedLeafBucketCollector != null) {
bufferingNestedLeafBucketCollector.postCollect();
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.nested;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
@ -45,9 +46,13 @@ import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.TypeFieldMapper;
import org.elasticsearch.index.mapper.UidFieldMapper;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.max.InternalMax;
@ -523,6 +528,118 @@ public class NestedAggregatorTests extends AggregatorTestCase {
}
}
public void testPreGetChildLeafCollectors() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
List<Document> documents = new ArrayList<>();
Document document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedDocValuesField("key", new BytesRef("key1")));
document.add(new SortedDocValuesField("value", new BytesRef("a1")));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedDocValuesField("key", new BytesRef("key2")));
document.add(new SortedDocValuesField("value", new BytesRef("b1")));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#1", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "_doc", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(sequenceIDFields.primaryTerm);
documents.add(document);
iw.addDocuments(documents);
iw.commit();
documents.clear();
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedDocValuesField("key", new BytesRef("key1")));
document.add(new SortedDocValuesField("value", new BytesRef("a2")));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedDocValuesField("key", new BytesRef("key2")));
document.add(new SortedDocValuesField("value", new BytesRef("b2")));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#2", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "_doc", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(sequenceIDFields.primaryTerm);
documents.add(document);
iw.addDocuments(documents);
iw.commit();
documents.clear();
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#3", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedDocValuesField("key", new BytesRef("key1")));
document.add(new SortedDocValuesField("value", new BytesRef("a3")));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#3", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedDocValuesField("key", new BytesRef("key2")));
document.add(new SortedDocValuesField("value", new BytesRef("b3")));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "_doc#1", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "_doc", TypeFieldMapper.Defaults.FIELD_TYPE));
document.add(sequenceIDFields.primaryTerm);
documents.add(document);
iw.addDocuments(documents);
iw.commit();
}
try (IndexReader indexReader = wrap(DirectoryReader.open(directory))) {
TermsAggregationBuilder valueBuilder = new TermsAggregationBuilder("value", ValueType.STRING).field("value");
TermsAggregationBuilder keyBuilder = new TermsAggregationBuilder("key", ValueType.STRING).field("key");
keyBuilder.subAggregation(valueBuilder);
NestedAggregationBuilder nestedBuilder = new NestedAggregationBuilder(NESTED_AGG, "nested_field");
nestedBuilder.subAggregation(keyBuilder);
FilterAggregationBuilder filterAggregationBuilder = new FilterAggregationBuilder("filterAgg", new MatchAllQueryBuilder());
filterAggregationBuilder.subAggregation(nestedBuilder);
MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType();
fieldType1.setName("key");
fieldType1.setHasDocValues(true);
MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType();
fieldType2.setName("value");
fieldType2.setHasDocValues(true);
Filter filter = search(newSearcher(indexReader, false, true),
Queries.newNonNestedFilter(Version.CURRENT), filterAggregationBuilder, fieldType1, fieldType2);
assertEquals("filterAgg", filter.getName());
assertEquals(3L, filter.getDocCount());
Nested nested = filter.getAggregations().get(NESTED_AGG);
assertEquals(6L, nested.getDocCount());
StringTerms keyAgg = nested.getAggregations().get("key");
assertEquals(2, keyAgg.getBuckets().size());
Terms.Bucket key1 = keyAgg.getBuckets().get(0);
assertEquals("key1", key1.getKey());
StringTerms valueAgg = key1.getAggregations().get("value");
assertEquals(3, valueAgg.getBuckets().size());
assertEquals("a1", valueAgg.getBuckets().get(0).getKey());
assertEquals("a2", valueAgg.getBuckets().get(1).getKey());
assertEquals("a3", valueAgg.getBuckets().get(2).getKey());
Terms.Bucket key2 = keyAgg.getBuckets().get(1);
assertEquals("key2", key2.getKey());
valueAgg = key2.getAggregations().get("value");
assertEquals(3, valueAgg.getBuckets().size());
assertEquals("b1", valueAgg.getBuckets().get(0).getKey());
assertEquals("b2", valueAgg.getBuckets().get(1).getKey());
assertEquals("b3", valueAgg.getBuckets().get(2).getKey());
}
}
}
private double generateMaxDocs(List<Document> documents, int numNestedDocs, int id, String path, String fieldName) {
return DoubleStream.of(generateDocuments(documents, numNestedDocs, id, path, fieldName))
.max().orElse(Double.NEGATIVE_INFINITY);