Aggregations: Ordinals-based string bucketing support.

When the ValuesSource has ordinals, terms ordinals are used as a cache key to
bucket ordinals. This can make terms aggregations on String terms significantly
faster.

Close #4350
This commit is contained in:
Adrien Grand 2013-11-27 14:55:54 +01:00
parent cc61348013
commit 36bd9cc432
15 changed files with 342 additions and 21 deletions

View File

@ -236,4 +236,31 @@ http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`L
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
==== Execution hint
There are two mechanisms by which terms aggregations can be executed: either by using field values directly in order to aggregate
data per-bucket (`map`), or by using ordinals of the field values instead of the values themselves (`ordinals`). Although the
latter execution mode can be expected to be slightly faster, it is only available for use when the underlying data source exposes
those terms ordinals. Moreover, it may actually be slower if most field values are unique. Elasticsearch tries to have sensible
defaults when it comes to the execution mode that should be used, but in case you know that an execution mode may perform better
than the other one, you have the ability to provide Elasticsearch with a hint:
[source,js]
--------------------------------------------------
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tags",
"execution_hint": "map" <1>
}
}
}
}
--------------------------------------------------
<1> the possible values are `map` and `ordinals`
Please note that Elasticsearch will ignore this execution hint if it is not applicable.

View File

@ -131,6 +131,13 @@ public enum BigArrays {
assert indexIsInt(index);
return array[(int) index] += inc;
}
@Override
public void fill(long fromIndex, long toIndex, long value) {
assert indexIsInt(fromIndex);
assert indexIsInt(toIndex);
Arrays.fill(array, (int) fromIndex, (int) toIndex, value);
}
}
private static class DoubleArrayWrapper implements DoubleArray {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.common.util;
import com.google.common.base.Preconditions;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@ -92,4 +93,20 @@ final class BigLongArray extends AbstractBigArray implements LongArray {
this.size = newSize;
}
@Override
public void fill(long fromIndex, long toIndex, long value) {
Preconditions.checkArgument(fromIndex <= toIndex);
final int fromPage = pageIndex(fromIndex);
final int toPage = pageIndex(toIndex - 1);
if (fromPage == toPage) {
Arrays.fill(pages[fromPage], indexInPage(fromIndex), indexInPage(toIndex - 1) + 1, value);
} else {
Arrays.fill(pages[fromPage], indexInPage(fromIndex), pages[fromPage].length, value);
for (int i = fromPage + 1; i < toPage; ++i) {
Arrays.fill(pages[i], value);
}
Arrays.fill(pages[toPage], 0, indexInPage(toIndex - 1) + 1, value);
}
}
}

View File

@ -39,4 +39,9 @@ public interface LongArray extends BigArray {
*/
public abstract long increment(long index, long inc);
/**
* Fill slots between <code>fromIndex</code> inclusive to <code>toIndex</code> exclusive with <code>value</code>.
*/
public abstract void fill(long fromIndex, long toIndex, long value);
}

View File

@ -19,9 +19,14 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.elasticsearch.common.lucene.ReaderContextAware;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
@ -29,6 +34,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriority
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
import java.io.IOException;
import java.util.Arrays;
@ -37,7 +43,6 @@ import java.util.Collections;
/**
* An aggregator of string values.
*/
// TODO we need a similar aggregator that would use ords, similarly to TermsStringOrdinalsFacetExecutor
public class StringTermsAggregator extends BucketsAggregator {
private static final int INITIAL_CAPACITY = 50; // TODO sizing
@ -46,7 +51,7 @@ public class StringTermsAggregator extends BucketsAggregator {
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final BytesRefHash bucketOrds;
protected final BytesRefHash bucketOrds;
private final IncludeExclude includeExclude;
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
@ -130,5 +135,56 @@ public class StringTermsAggregator extends BucketsAggregator {
return new StringTerms(name, order, requiredSize, Collections.<InternalTerms.Bucket>emptyList());
}
/**
* Extension of StringTermsAggregator that caches bucket ords using terms ordinals.
*/
public static class WithOrdinals extends StringTermsAggregator implements ReaderContextAware {
private final BytesValuesSource.WithOrdinals valuesSource;
private BytesValues.WithOrdinals bytesValues;
private Ordinals.Docs ordinals;
private LongArray ordinalToBucket;
public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, InternalOrder order, int requiredSize,
int shardSize, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, valuesSource, order, requiredSize, shardSize, null, aggregationContext, parent);
this.valuesSource = valuesSource;
}
@Override
public void setNextReader(AtomicReaderContext reader) {
bytesValues = valuesSource.bytesValues();
ordinals = bytesValues.ordinals();
final long maxOrd = ordinals.getMaxOrd();
if (ordinalToBucket == null || ordinalToBucket.size() < maxOrd) {
ordinalToBucket = BigArrays.newLongArray(BigArrays.overSize(maxOrd));
}
ordinalToBucket.fill(0, maxOrd, -1L);
}
@Override
public void collect(int doc, long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
final int valuesCount = ordinals.setDocument(doc);
for (int i = 0; i < valuesCount; ++i) {
final long ord = ordinals.nextOrd();
long bucketOrd = ordinalToBucket.get(ord);
if (bucketOrd < 0) { // unlikely condition on a low-cardinality field
final BytesRef bytes = bytesValues.getValueByOrd(ord);
final int hash = bytesValues.currentValueHash();
assert hash == bytes.hashCode();
bucketOrd = bucketOrds.add(bytes, hash);
if (bucketOrd < 0) { // already seen in another segment
bucketOrd = - 1 - bucketOrd;
}
ordinalToBucket.set(ord, bucketOrd);
}
collectBucket(doc, bucketOrd);
}
}
}
}

View File

@ -19,8 +19,10 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.Aggregator.BucketAggregationMode;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValueSourceAggregatorFactory;
@ -34,17 +36,22 @@ import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource
*/
public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
public static final String EXECUTION_HINT_VALUE_MAP = "map";
public static final String EXECUTION_HINT_VALUE_ORDINALS = "ordinals";
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final IncludeExclude includeExclude;
private final String executionHint;
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude) {
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, String executionHint) {
super(name, StringTerms.TYPE.name(), valueSourceConfig);
this.order = order;
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.includeExclude = includeExclude;
this.executionHint = executionHint;
}
@Override
@ -52,10 +59,46 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
return new UnmappedTermsAggregator(name, order, requiredSize, aggregationContext, parent);
}
private static boolean hasParentBucketAggregator(Aggregator parent) {
if (parent == null) {
return false;
} else if (parent.bucketAggregationMode() == BucketAggregationMode.PER_BUCKET) {
return true;
} else {
return hasParentBucketAggregator(parent.parent());
}
}
@Override
protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
if (valuesSource instanceof BytesValuesSource) {
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
if (executionHint != null && !executionHint.equals(EXECUTION_HINT_VALUE_MAP) && !executionHint.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
throw new ElasticSearchIllegalArgumentException("execution_hint can only be '" + EXECUTION_HINT_VALUE_MAP + "' or '" + EXECUTION_HINT_VALUE_ORDINALS + "', not " + executionHint);
}
String execution = executionHint;
if (!(valuesSource instanceof BytesValuesSource.WithOrdinals)) {
execution = EXECUTION_HINT_VALUE_MAP;
} else if (includeExclude != null) {
execution = EXECUTION_HINT_VALUE_MAP;
}
if (execution == null) {
if ((valuesSource instanceof BytesValuesSource.WithOrdinals)
&& !hasParentBucketAggregator(parent)) {
execution = EXECUTION_HINT_VALUE_ORDINALS;
} else {
execution = EXECUTION_HINT_VALUE_MAP;
}
}
assert execution != null;
if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
assert includeExclude == null;
final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name, factories, (BytesValuesSource.WithOrdinals) valuesSource, order, requiredSize, shardSize, aggregationContext, parent);
aggregationContext.registerReaderContextAware(aggregator);
return aggregator;
} else {
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
}
}
if (includeExclude != null) {

View File

@ -20,6 +20,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
private int includeFlags;
private String excludePattern;
private int excludeFlags;
private String executionHint;
public TermsBuilder(String name) {
super(name, "terms");
@ -102,6 +103,11 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
return this;
}
public TermsBuilder executionHint(String executionHint) {
this.executionHint = executionHint;
return this;
}
@Override
protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException {
if (size >=0) {
@ -137,6 +143,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
.endObject();
}
}
if (executionHint != null) {
builder.field("execution_hint", executionHint);
}
return builder;
}
}

View File

@ -72,6 +72,7 @@ public class TermsParser implements Aggregator.Parser {
int includeFlags = 0; // 0 means no flags
String exclude = null;
int excludeFlags = 0; // 0 means no flags
String executionHint = null;
XContentParser.Token token;
@ -94,6 +95,8 @@ public class TermsParser implements Aggregator.Parser {
include = parser.text();
} else if ("exclude".equals(currentFieldName)) {
exclude = parser.text();
} else if ("execution_hint".equals(currentFieldName) || "executionHint".equals(currentFieldName)) {
executionHint = parser.text();
}
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
if ("script_values_unique".equals(currentFieldName)) {
@ -192,14 +195,14 @@ public class TermsParser implements Aggregator.Parser {
if (!assumeUnique) {
config.ensureUnique(true);
}
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
}
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
if (mapper == null) {
ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
config.unmapped(true);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
}
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
@ -241,7 +244,7 @@ public class TermsParser implements Aggregator.Parser {
config.ensureUnique(true);
}
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
}
static InternalOrder resolveOrder(String key, boolean asc) {

View File

@ -27,9 +27,11 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.common.lucene.ReaderContextAware;
import org.elasticsearch.common.lucene.ScorerAware;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.support.FieldDataSource.Uniqueness;
import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
import org.elasticsearch.search.aggregations.support.geopoints.GeoPointValuesSource;
import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource;
@ -160,10 +162,15 @@ public class AggregationContext implements ReaderContextAware, ScorerAware {
return new NumericValuesSource(dataSource, config.formatter(), config.parser());
}
private BytesValuesSource bytesField(ObjectObjectOpenHashMap<String, FieldDataSource> fieldDataSources, ValuesSourceConfig<?> config) {
private ValuesSource bytesField(ObjectObjectOpenHashMap<String, FieldDataSource> fieldDataSources, ValuesSourceConfig<?> config) {
FieldDataSource dataSource = fieldDataSources.get(config.fieldContext.field());
if (dataSource == null) {
dataSource = new FieldDataSource.Bytes.FieldData(config.fieldContext.indexFieldData());
final IndexFieldData<?> indexFieldData = config.fieldContext.indexFieldData();
if (indexFieldData instanceof IndexFieldData.WithOrdinals<?>) {
dataSource = new FieldDataSource.Bytes.WithOrdinals.FieldData((IndexFieldData.WithOrdinals<?>) indexFieldData);
} else {
dataSource = new FieldDataSource.Bytes.FieldData(indexFieldData);
}
setReaderIfNeeded((ReaderContextAware) dataSource);
readerAwares.add((ReaderContextAware) dataSource);
fieldDataSources.put(config.fieldContext.field(), dataSource);
@ -178,14 +185,19 @@ public class AggregationContext implements ReaderContextAware, ScorerAware {
// Even in case we wrap field data, we might still need to wrap for sorting, because the wrapped field data might be
// eg. a numeric field data that doesn't sort according to the byte order. However field data values are unique so no
// need to wrap for uniqueness
if ((config.ensureUnique && !(dataSource instanceof FieldDataSource.Bytes.FieldData)) || config.ensureSorted) {
if ((config.ensureUnique && dataSource.getUniqueness() != Uniqueness.UNIQUE) || config.ensureSorted) {
dataSource = new FieldDataSource.Bytes.SortedAndUnique(dataSource);
readerAwares.add((ReaderContextAware) dataSource);
}
if (config.needsHashes) { // the data source needs hash if at least one consumer needs hashes
dataSource.setNeedsHashes(true);
}
return new BytesValuesSource(dataSource);
if (dataSource instanceof FieldDataSource.Bytes.WithOrdinals) {
return new BytesValuesSource.WithOrdinals((FieldDataSource.Bytes.WithOrdinals) dataSource);
} else {
return new BytesValuesSource(dataSource);
}
}
private BytesValuesSource bytesScript(ValuesSourceConfig<?> config) {

View File

@ -58,6 +58,51 @@ public abstract class FieldDataSource {
public static abstract class Bytes extends FieldDataSource {
public static abstract class WithOrdinals extends Bytes {
public abstract BytesValues.WithOrdinals bytesValues();
public static class FieldData extends WithOrdinals implements ReaderContextAware {
protected boolean needsHashes;
protected final IndexFieldData.WithOrdinals<?> indexFieldData;
protected AtomicFieldData.WithOrdinals<?> atomicFieldData;
private BytesValues.WithOrdinals bytesValues;
public FieldData(IndexFieldData.WithOrdinals<?> indexFieldData) {
this.indexFieldData = indexFieldData;
needsHashes = false;
}
@Override
public Uniqueness getUniqueness() {
return Uniqueness.UNIQUE;
}
public final void setNeedsHashes(boolean needsHashes) {
this.needsHashes = needsHashes;
}
@Override
public void setNextReader(AtomicReaderContext reader) {
atomicFieldData = indexFieldData.load(reader);
if (bytesValues != null) {
bytesValues = atomicFieldData.getBytesValues(needsHashes);
}
}
@Override
public BytesValues.WithOrdinals bytesValues() {
if (bytesValues == null) {
bytesValues = atomicFieldData.getBytesValues(needsHashes);
}
return bytesValues;
}
}
}
public static class FieldData extends Bytes implements ReaderContextAware {
protected boolean needsHashes;

View File

@ -26,7 +26,7 @@ import org.elasticsearch.search.aggregations.support.ValuesSource;
/**
*
*/
public final class BytesValuesSource implements ValuesSource {
public class BytesValuesSource implements ValuesSource {
private final FieldDataSource source;
@ -39,4 +39,20 @@ public final class BytesValuesSource implements ValuesSource {
return source.bytesValues();
}
public static final class WithOrdinals extends BytesValuesSource {
private final FieldDataSource.Bytes.WithOrdinals source;
public WithOrdinals(FieldDataSource.Bytes.WithOrdinals source) {
super(source);
this.source = source;
}
@Override
public BytesValues.WithOrdinals bytesValues() {
return source.bytesValues();
}
}
}

View File

@ -81,7 +81,7 @@ public class TermsAggregationSearchBenchmark {
AGGREGATION {
@Override
SearchRequestBuilder addTermsAgg(SearchRequestBuilder builder, String name, String field, String executionHint) {
return builder.addAggregation(AggregationBuilders.terms(name).field(field));
return builder.addAggregation(AggregationBuilders.terms(name).executionHint(executionHint).field(field));
}
@Override
@ -234,6 +234,8 @@ public class TermsAggregationSearchBenchmark {
stats.add(terms("terms_facet_map_s_dv", Method.FACET, "s_value_dv", "map"));
stats.add(terms("terms_agg_s", Method.AGGREGATION, "s_value", null));
stats.add(terms("terms_agg_s_dv", Method.AGGREGATION, "s_value_dv", null));
stats.add(terms("terms_agg_map_s", Method.AGGREGATION, "s_value", "map"));
stats.add(terms("terms_agg_map_s_dv", Method.AGGREGATION, "s_value_dv", "map"));
stats.add(terms("terms_facet_l", Method.FACET, "l_value", null));
stats.add(terms("terms_facet_l_dv", Method.FACET, "l_value_dv", null));
stats.add(terms("terms_agg_l", Method.AGGREGATION, "l_value", null));
@ -244,6 +246,8 @@ public class TermsAggregationSearchBenchmark {
stats.add(terms("terms_facet_map_sm_dv", Method.FACET, "sm_value_dv", "map"));
stats.add(terms("terms_agg_sm", Method.AGGREGATION, "sm_value", null));
stats.add(terms("terms_agg_sm_dv", Method.AGGREGATION, "sm_value_dv", null));
stats.add(terms("terms_agg_map_sm", Method.AGGREGATION, "sm_value", "map"));
stats.add(terms("terms_agg_map_sm_dv", Method.AGGREGATION, "sm_value_dv", "map"));
stats.add(terms("terms_facet_lm", Method.FACET, "lm_value", null));
stats.add(terms("terms_facet_lm_dv", Method.FACET, "lm_value_dv", null));
stats.add(terms("terms_agg_lm", Method.AGGREGATION, "lm_value", null));

View File

@ -109,4 +109,24 @@ public class BigArraysTests extends ElasticsearchTestCase {
}
}
public void testLongArrayFill() {
final int len = randomIntBetween(1, 100000);
final int fromIndex = randomIntBetween(0, len - 1);
final int toIndex = randomBoolean()
? Math.min(fromIndex + randomInt(100), len) // single page
: randomIntBetween(fromIndex, len); // likely multiple pages
final LongArray array2 = BigArrays.newLongArray(len);
final long[] array1 = new long[len];
for (int i = 0; i < len; ++i) {
array1[i] = randomLong();
array2.set(i, array1[i]);
}
final long rand = randomLong();
Arrays.fill(array1, fromIndex, toIndex, rand);
array2.fill(fromIndex, toIndex, rand);
for (int i = 0; i < len; ++i) {
assertEquals(array1[i], array2.get(i));
}
}
}

View File

@ -28,11 +28,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.RangeFilterBuilder;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.range.Range;
import org.elasticsearch.search.aggregations.bucket.range.RangeBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@ -194,23 +195,29 @@ public class RandomTests extends ElasticsearchIntegrationTest {
SearchResponse resp = client().prepareSearch("idx")
.addAggregation(terms("long").field("long_values").size(maxNumTerms).subAggregation(min("min").field("num")))
.addAggregation(terms("double").field("double_values").size(maxNumTerms).subAggregation(max("max").field("num")))
.addAggregation(terms("string").field("string_values").size(maxNumTerms).subAggregation(stats("stats").field("num"))).execute().actionGet();
.addAggregation(terms("string_map").field("string_values").executionHint(TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP).size(maxNumTerms).subAggregation(stats("stats").field("num")))
.addAggregation(terms("string_ordinals").field("string_values").executionHint(TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS).size(maxNumTerms).subAggregation(extendedStats("stats").field("num"))).execute().actionGet();
assertEquals(0, resp.getFailedShards());
final Terms longTerms = resp.getAggregations().get("long");
final Terms doubleTerms = resp.getAggregations().get("double");
final Terms stringTerms = resp.getAggregations().get("string");
final Terms stringMapTerms = resp.getAggregations().get("string_map");
final Terms stringOrdinalsTerms = resp.getAggregations().get("string_ordinals");
assertEquals(valuesSet.size(), longTerms.buckets().size());
assertEquals(valuesSet.size(), doubleTerms.buckets().size());
assertEquals(valuesSet.size(), stringTerms.buckets().size());
assertEquals(valuesSet.size(), stringMapTerms.buckets().size());
assertEquals(valuesSet.size(), stringOrdinalsTerms.buckets().size());
for (Terms.Bucket bucket : longTerms.buckets()) {
final Terms.Bucket doubleBucket = doubleTerms.getByTerm(Double.toString(Long.parseLong(bucket.getKey().string())));
final Terms.Bucket stringBucket = stringTerms.getByTerm(bucket.getKey().string());
final Terms.Bucket stringMapBucket = stringMapTerms.getByTerm(bucket.getKey().string());
final Terms.Bucket stringOrdinalsBucket = stringOrdinalsTerms.getByTerm(bucket.getKey().string());
assertNotNull(doubleBucket);
assertNotNull(stringBucket);
assertNotNull(stringMapBucket);
assertNotNull(stringOrdinalsBucket);
assertEquals(bucket.getDocCount(), doubleBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringMapBucket.getDocCount());
assertEquals(bucket.getDocCount(), stringOrdinalsBucket.getDocCount());
}
}

View File

@ -24,8 +24,10 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.hamcrest.Matchers;
@ -33,10 +35,12 @@ import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.search.aggregations.AggregationBuilders.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
@ -58,6 +62,10 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
.build();
}
private String randomExecutionHint() {
return randomFrom(Arrays.asList(null, TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP, TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS));
}
@Before
public void init() throws Exception {
createIndex("idx");
@ -88,6 +96,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value"))
.execute().actionGet();
@ -254,6 +263,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField_WithMaxSize() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value")
.size(20)
.order(Terms.Order.TERM_ASC)) // we need to sort by terms cause we're checking the first 20 values
@ -278,6 +288,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField_OrderedByTermAsc() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value")
.order(Terms.Order.TERM_ASC))
.execute().actionGet();
@ -302,6 +313,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField_OrderedByTermDesc() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value")
.order(Terms.Order.TERM_DESC))
.execute().actionGet();
@ -326,6 +338,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValuedField_WithSubAggregation() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value")
.subAggregation(count("count").field("values")))
.execute().actionGet();
@ -352,6 +365,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValuedField_WithSubAggregation_Inherited() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value")
.subAggregation(count("count")))
.execute().actionGet();
@ -378,6 +392,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValuedField_WithValueScript() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value")
.script("'foo_' + _value"))
.execute().actionGet();
@ -401,6 +416,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField_WithValueScript_NotUnique() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("values")
.script("_value.substring(0,3)"))
.execute().actionGet();
@ -422,6 +438,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("values"))
.execute().actionGet();
@ -448,6 +465,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField_WithValueScript() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("values")
.script("'foo_' + _value"))
.execute().actionGet();
@ -493,6 +511,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField_WithValueScript_WithInheritedSubAggregator() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("values")
.script("'foo_' + _value")
.subAggregation(count("count")))
@ -527,6 +546,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_SingleValue() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.script("doc['value'].value"))
.execute().actionGet();
@ -549,6 +569,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_SingleValue_ExplicitSingleValue() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.script("doc['value'].value"))
.execute().actionGet();
@ -571,6 +592,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_SingleValue_WithSubAggregator_Inherited() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.script("doc['value'].value")
.subAggregation(count("count")))
.execute().actionGet();
@ -597,6 +619,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_MultiValued() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.script("doc['values'].values"))
.execute().actionGet();
@ -623,6 +646,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_MultiValued_WithAggregatorInherited() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.script("doc['values'].values")
.subAggregation(count("count")))
.execute().actionGet();
@ -656,6 +680,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void unmapped() throws Exception {
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value"))
.execute().actionGet();
@ -671,6 +696,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void partiallyUnmapped() throws Exception {
SearchResponse response = client().prepareSearch("idx", "idx_unmapped").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("value"))
.execute().actionGet();
@ -689,6 +715,30 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
}
}
@Test
public void stringTermsNestedIntoPerBucketAggregator() throws Exception {
// no execution hint so that the logic that decides whether or not to use ordinals is executed
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(filter("filter").filter(termFilter("values", "val3")).subAggregation(terms("terms").field("values")))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
Filter filter = response.getAggregations().get("filter");
Terms terms = filter.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(3));
for (int i = 2; i <= 4; i++) {
Terms.Bucket bucket = terms.getByTerm("val" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val" + i));
assertThat(bucket.getDocCount(), equalTo(i == 3 ? 2L : 1L));
}
}
@Test
public void emptyAggregation() throws Exception {
prepareCreate("empty_bucket_idx").addMapping("type", "value", "type=integer").execute().actionGet();