Make `missing` on terms aggs work with all execution modes.

There are two bugs:
 - the 'global_ordinals_low_cardinality' mode requires a fielddata-based impl so
   that it can extract the segment to global ordinal mapping
 - the 'global_ordinals_hash' mode abusively casts to the values source to a
   fielddata-based impl while it is not needed

Closes #14882
This commit is contained in:
Adrien Grand 2016-01-04 11:13:17 +01:00
parent 1a47226d9a
commit c934f859c7
3 changed files with 29 additions and 20 deletions

View File

@ -267,9 +267,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private final LongHash bucketOrds; private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext,
Aggregator parent, SubAggCollectionMode collectionMode, Aggregator parent, SubAggCollectionMode collectionMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException { throws IOException {
super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode, super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode,
@ -341,7 +341,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private RandomAccessOrds segmentOrds; private RandomAccessOrds segmentOrds;
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
Terms.Order order, Terms.Order order,
BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent,
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
@ -411,11 +411,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
// This is the cleanest way I can think of so far // This is the cleanest way I can think of so far
GlobalOrdinalMapping mapping; GlobalOrdinalMapping mapping;
if (globalOrds instanceof GlobalOrdinalMapping) { if (globalOrds.getValueCount() == segmentOrds.getValueCount()) {
mapping = (GlobalOrdinalMapping) globalOrds;
} else {
assert globalOrds.getValueCount() == segmentOrds.getValueCount();
mapping = null; mapping = null;
} else {
mapping = (GlobalOrdinalMapping) globalOrds;
} }
for (long i = 1; i < segmentDocCounts.size(); i++) { for (long i = 1; i < segmentDocCounts.size(); i++) {
// We use set(...) here, because we need to reset the slow to 0. // We use set(...) here, because we need to reset the slow to 0.

View File

@ -94,7 +94,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
throws IOException { throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, bucketCountThresholds, filter, aggregationContext,
parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
} }
@ -111,7 +111,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException { throws IOException {
if (includeExclude != null || factories.count() > 0) { if (includeExclude != null || factories.count() > 0
// we need the FieldData impl to be able to extract the
// segment to global ord mapping
|| valuesSource.getClass() != ValuesSource.Bytes.FieldData.class) {
return GLOBAL_ORDINALS.create(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, return GLOBAL_ORDINALS.create(name, factories, valuesSource, order, bucketCountThresholds, includeExclude,
aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
} }

View File

@ -24,6 +24,7 @@ import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode;
import org.elasticsearch.search.aggregations.metrics.cardinality.Cardinality; import org.elasticsearch.search.aggregations.metrics.cardinality.Cardinality;
import org.elasticsearch.search.aggregations.metrics.geobounds.GeoBounds; import org.elasticsearch.search.aggregations.metrics.geobounds.GeoBounds;
import org.elasticsearch.search.aggregations.metrics.geocentroid.GeoCentroid; import org.elasticsearch.search.aggregations.metrics.geocentroid.GeoCentroid;
@ -68,18 +69,24 @@ public class MissingValueIT extends ESIntegTestCase {
} }
public void testStringTerms() { public void testStringTerms() {
SearchResponse response = client().prepareSearch("idx").addAggregation(terms("my_terms").field("str").missing("bar")).get(); for (ExecutionMode mode : ExecutionMode.values()) {
assertSearchResponse(response); SearchResponse response = client().prepareSearch("idx").addAggregation(
Terms terms = response.getAggregations().get("my_terms"); terms("my_terms")
assertEquals(2, terms.getBuckets().size()); .field("str")
assertEquals(1, terms.getBucketByKey("foo").getDocCount()); .executionHint(mode.toString())
assertEquals(1, terms.getBucketByKey("bar").getDocCount()); .missing("bar")).get();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("my_terms");
assertEquals(2, terms.getBuckets().size());
assertEquals(1, terms.getBucketByKey("foo").getDocCount());
assertEquals(1, terms.getBucketByKey("bar").getDocCount());
response = client().prepareSearch("idx").addAggregation(terms("my_terms").field("str").missing("foo")).get(); response = client().prepareSearch("idx").addAggregation(terms("my_terms").field("str").missing("foo")).get();
assertSearchResponse(response); assertSearchResponse(response);
terms = response.getAggregations().get("my_terms"); terms = response.getAggregations().get("my_terms");
assertEquals(1, terms.getBuckets().size()); assertEquals(1, terms.getBuckets().size());
assertEquals(2, terms.getBucketByKey("foo").getDocCount()); assertEquals(2, terms.getBucketByKey("foo").getDocCount());
}
} }
public void testLongTerms() { public void testLongTerms() {