diff --git a/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc index 5427d6c95c9..35cfeb52d42 100644 --- a/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc +++ b/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc @@ -236,4 +236,31 @@ http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`L http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`], http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`], http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and -http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`] \ No newline at end of file +http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`] + +==== Execution hint + +There are two mechanisms by which terms aggregations can be executed: either by using field values directly in order to aggregate +data per-bucket (`map`), or by using ordinals of the field values instead of the values themselves (`ordinals`). Although the +latter execution mode can be expected to be slightly faster, it is only available for use when the underlying data source exposes +those terms ordinals. Moreover, it may actually be slower if most field values are unique. Elasticsearch tries to have sensible +defaults when it comes to the execution mode that should be used, but in case you know that an execution mode may perform better +than the other one, you have the ability to provide Elasticsearch with a hint: + +[source,js] +-------------------------------------------------- +{ + "aggs" : { + "tags" : { + "terms" : { + "field" : "tags", + "execution_hint": "map" <1> + } + } + } +} +-------------------------------------------------- + +<1> the possible values are `map` and `ordinals` + +Please note that Elasticsearch will ignore this execution hint if it is not applicable. diff --git a/src/main/java/org/elasticsearch/common/util/BigArrays.java b/src/main/java/org/elasticsearch/common/util/BigArrays.java index 9fef20bd15b..4dc6d98869c 100644 --- a/src/main/java/org/elasticsearch/common/util/BigArrays.java +++ b/src/main/java/org/elasticsearch/common/util/BigArrays.java @@ -131,6 +131,13 @@ public enum BigArrays { assert indexIsInt(index); return array[(int) index] += inc; } + + @Override + public void fill(long fromIndex, long toIndex, long value) { + assert indexIsInt(fromIndex); + assert indexIsInt(toIndex); + Arrays.fill(array, (int) fromIndex, (int) toIndex, value); + } } private static class DoubleArrayWrapper implements DoubleArray { diff --git a/src/main/java/org/elasticsearch/common/util/BigLongArray.java b/src/main/java/org/elasticsearch/common/util/BigLongArray.java index 3faf67e7a1a..0e1b882fe25 100644 --- a/src/main/java/org/elasticsearch/common/util/BigLongArray.java +++ b/src/main/java/org/elasticsearch/common/util/BigLongArray.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.util; +import com.google.common.base.Preconditions; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -92,4 +93,20 @@ final class BigLongArray extends AbstractBigArray implements LongArray { this.size = newSize; } + @Override + public void fill(long fromIndex, long toIndex, long value) { + Preconditions.checkArgument(fromIndex <= toIndex); + final int fromPage = pageIndex(fromIndex); + final int toPage = pageIndex(toIndex - 1); + if (fromPage == toPage) { + Arrays.fill(pages[fromPage], indexInPage(fromIndex), indexInPage(toIndex - 1) + 1, value); + } else { + Arrays.fill(pages[fromPage], indexInPage(fromIndex), pages[fromPage].length, value); + for (int i = fromPage + 1; i < toPage; ++i) { + Arrays.fill(pages[i], value); + } + Arrays.fill(pages[toPage], 0, indexInPage(toIndex - 1) + 1, value); + } + } + } diff --git a/src/main/java/org/elasticsearch/common/util/LongArray.java b/src/main/java/org/elasticsearch/common/util/LongArray.java index b00058ecf8c..d986cb7534d 100644 --- a/src/main/java/org/elasticsearch/common/util/LongArray.java +++ b/src/main/java/org/elasticsearch/common/util/LongArray.java @@ -39,4 +39,9 @@ public interface LongArray extends BigArray { */ public abstract long increment(long index, long inc); + /** + * Fill slots between fromIndex inclusive to toIndex exclusive with value. + */ + public abstract void fill(long fromIndex, long toIndex, long value); + } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java index 2b6f21fc034..b8ceece6732 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java @@ -19,9 +19,14 @@ package org.elasticsearch.search.aggregations.bucket.terms; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.elasticsearch.common.lucene.ReaderContextAware; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.fielddata.BytesValues; +import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; @@ -29,6 +34,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriority import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource; import java.io.IOException; import java.util.Arrays; @@ -37,7 +43,6 @@ import java.util.Collections; /** * An aggregator of string values. */ -// TODO we need a similar aggregator that would use ords, similarly to TermsStringOrdinalsFacetExecutor public class StringTermsAggregator extends BucketsAggregator { private static final int INITIAL_CAPACITY = 50; // TODO sizing @@ -46,7 +51,7 @@ public class StringTermsAggregator extends BucketsAggregator { private final InternalOrder order; private final int requiredSize; private final int shardSize; - private final BytesRefHash bucketOrds; + protected final BytesRefHash bucketOrds; private final IncludeExclude includeExclude; public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, @@ -130,5 +135,56 @@ public class StringTermsAggregator extends BucketsAggregator { return new StringTerms(name, order, requiredSize, Collections.emptyList()); } + /** + * Extension of StringTermsAggregator that caches bucket ords using terms ordinals. + */ + public static class WithOrdinals extends StringTermsAggregator implements ReaderContextAware { + + private final BytesValuesSource.WithOrdinals valuesSource; + private BytesValues.WithOrdinals bytesValues; + private Ordinals.Docs ordinals; + private LongArray ordinalToBucket; + + public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, InternalOrder order, int requiredSize, + int shardSize, AggregationContext aggregationContext, Aggregator parent) { + super(name, factories, valuesSource, order, requiredSize, shardSize, null, aggregationContext, parent); + this.valuesSource = valuesSource; + } + + @Override + public void setNextReader(AtomicReaderContext reader) { + bytesValues = valuesSource.bytesValues(); + ordinals = bytesValues.ordinals(); + final long maxOrd = ordinals.getMaxOrd(); + if (ordinalToBucket == null || ordinalToBucket.size() < maxOrd) { + ordinalToBucket = BigArrays.newLongArray(BigArrays.overSize(maxOrd)); + } + ordinalToBucket.fill(0, maxOrd, -1L); + } + + @Override + public void collect(int doc, long owningBucketOrdinal) throws IOException { + assert owningBucketOrdinal == 0; + final int valuesCount = ordinals.setDocument(doc); + + for (int i = 0; i < valuesCount; ++i) { + final long ord = ordinals.nextOrd(); + long bucketOrd = ordinalToBucket.get(ord); + if (bucketOrd < 0) { // unlikely condition on a low-cardinality field + final BytesRef bytes = bytesValues.getValueByOrd(ord); + final int hash = bytesValues.currentValueHash(); + assert hash == bytes.hashCode(); + bucketOrd = bucketOrds.add(bytes, hash); + if (bucketOrd < 0) { // already seen in another segment + bucketOrd = - 1 - bucketOrd; + } + ordinalToBucket.set(ord, bucketOrd); + } + + collectBucket(doc, bucketOrd); + } + } + } + } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 5090ca4e415..246c07d29a5 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -19,8 +19,10 @@ package org.elasticsearch.search.aggregations.bucket.terms; +import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.Aggregator.BucketAggregationMode; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValueSourceAggregatorFactory; @@ -34,17 +36,22 @@ import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource */ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory { + public static final String EXECUTION_HINT_VALUE_MAP = "map"; + public static final String EXECUTION_HINT_VALUE_ORDINALS = "ordinals"; + private final InternalOrder order; private final int requiredSize; private final int shardSize; private final IncludeExclude includeExclude; + private final String executionHint; - public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude) { + public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, String executionHint) { super(name, StringTerms.TYPE.name(), valueSourceConfig); this.order = order; this.requiredSize = requiredSize; this.shardSize = shardSize; this.includeExclude = includeExclude; + this.executionHint = executionHint; } @Override @@ -52,10 +59,46 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory { return new UnmappedTermsAggregator(name, order, requiredSize, aggregationContext, parent); } + private static boolean hasParentBucketAggregator(Aggregator parent) { + if (parent == null) { + return false; + } else if (parent.bucketAggregationMode() == BucketAggregationMode.PER_BUCKET) { + return true; + } else { + return hasParentBucketAggregator(parent.parent()); + } + } + @Override protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) { if (valuesSource instanceof BytesValuesSource) { - return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent); + if (executionHint != null && !executionHint.equals(EXECUTION_HINT_VALUE_MAP) && !executionHint.equals(EXECUTION_HINT_VALUE_ORDINALS)) { + throw new ElasticSearchIllegalArgumentException("execution_hint can only be '" + EXECUTION_HINT_VALUE_MAP + "' or '" + EXECUTION_HINT_VALUE_ORDINALS + "', not " + executionHint); + } + String execution = executionHint; + if (!(valuesSource instanceof BytesValuesSource.WithOrdinals)) { + execution = EXECUTION_HINT_VALUE_MAP; + } else if (includeExclude != null) { + execution = EXECUTION_HINT_VALUE_MAP; + } + if (execution == null) { + if ((valuesSource instanceof BytesValuesSource.WithOrdinals) + && !hasParentBucketAggregator(parent)) { + execution = EXECUTION_HINT_VALUE_ORDINALS; + } else { + execution = EXECUTION_HINT_VALUE_MAP; + } + } + assert execution != null; + + if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) { + assert includeExclude == null; + final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name, factories, (BytesValuesSource.WithOrdinals) valuesSource, order, requiredSize, shardSize, aggregationContext, parent); + aggregationContext.registerReaderContextAware(aggregator); + return aggregator; + } else { + return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent); + } } if (includeExclude != null) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java index 44396de3560..4a1bce95a24 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java @@ -20,6 +20,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { private int includeFlags; private String excludePattern; private int excludeFlags; + private String executionHint; public TermsBuilder(String name) { super(name, "terms"); @@ -102,6 +103,11 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { return this; } + public TermsBuilder executionHint(String executionHint) { + this.executionHint = executionHint; + return this; + } + @Override protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException { if (size >=0) { @@ -137,6 +143,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { .endObject(); } } + if (executionHint != null) { + builder.field("execution_hint", executionHint); + } return builder; } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java index 4ea1085aed6..477811d4f09 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java @@ -72,6 +72,7 @@ public class TermsParser implements Aggregator.Parser { int includeFlags = 0; // 0 means no flags String exclude = null; int excludeFlags = 0; // 0 means no flags + String executionHint = null; XContentParser.Token token; @@ -94,6 +95,8 @@ public class TermsParser implements Aggregator.Parser { include = parser.text(); } else if ("exclude".equals(currentFieldName)) { exclude = parser.text(); + } else if ("execution_hint".equals(currentFieldName) || "executionHint".equals(currentFieldName)) { + executionHint = parser.text(); } } else if (token == XContentParser.Token.VALUE_BOOLEAN) { if ("script_values_unique".equals(currentFieldName)) { @@ -192,14 +195,14 @@ public class TermsParser implements Aggregator.Parser { if (!assumeUnique) { config.ensureUnique(true); } - return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude); + return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint); } FieldMapper mapper = context.smartNameFieldMapper(field); if (mapper == null) { ValuesSourceConfig config = new ValuesSourceConfig(BytesValuesSource.class); config.unmapped(true); - return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude); + return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint); } IndexFieldData indexFieldData = context.fieldData().getForField(mapper); @@ -241,7 +244,7 @@ public class TermsParser implements Aggregator.Parser { config.ensureUnique(true); } - return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude); + return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint); } static InternalOrder resolveOrder(String key, boolean asc) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java b/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java index 54947645c56..16b3093cda9 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java @@ -27,9 +27,11 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.lucene.ReaderContextAware; import org.elasticsearch.common.lucene.ScorerAware; +import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.search.aggregations.AggregationExecutionException; +import org.elasticsearch.search.aggregations.support.FieldDataSource.Uniqueness; import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource; import org.elasticsearch.search.aggregations.support.geopoints.GeoPointValuesSource; import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource; @@ -160,10 +162,15 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { return new NumericValuesSource(dataSource, config.formatter(), config.parser()); } - private BytesValuesSource bytesField(ObjectObjectOpenHashMap fieldDataSources, ValuesSourceConfig config) { + private ValuesSource bytesField(ObjectObjectOpenHashMap fieldDataSources, ValuesSourceConfig config) { FieldDataSource dataSource = fieldDataSources.get(config.fieldContext.field()); if (dataSource == null) { - dataSource = new FieldDataSource.Bytes.FieldData(config.fieldContext.indexFieldData()); + final IndexFieldData indexFieldData = config.fieldContext.indexFieldData(); + if (indexFieldData instanceof IndexFieldData.WithOrdinals) { + dataSource = new FieldDataSource.Bytes.WithOrdinals.FieldData((IndexFieldData.WithOrdinals) indexFieldData); + } else { + dataSource = new FieldDataSource.Bytes.FieldData(indexFieldData); + } setReaderIfNeeded((ReaderContextAware) dataSource); readerAwares.add((ReaderContextAware) dataSource); fieldDataSources.put(config.fieldContext.field(), dataSource); @@ -178,14 +185,19 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { // Even in case we wrap field data, we might still need to wrap for sorting, because the wrapped field data might be // eg. a numeric field data that doesn't sort according to the byte order. However field data values are unique so no // need to wrap for uniqueness - if ((config.ensureUnique && !(dataSource instanceof FieldDataSource.Bytes.FieldData)) || config.ensureSorted) { + if ((config.ensureUnique && dataSource.getUniqueness() != Uniqueness.UNIQUE) || config.ensureSorted) { dataSource = new FieldDataSource.Bytes.SortedAndUnique(dataSource); readerAwares.add((ReaderContextAware) dataSource); } + if (config.needsHashes) { // the data source needs hash if at least one consumer needs hashes dataSource.setNeedsHashes(true); } - return new BytesValuesSource(dataSource); + if (dataSource instanceof FieldDataSource.Bytes.WithOrdinals) { + return new BytesValuesSource.WithOrdinals((FieldDataSource.Bytes.WithOrdinals) dataSource); + } else { + return new BytesValuesSource(dataSource); + } } private BytesValuesSource bytesScript(ValuesSourceConfig config) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java index dc87f3922d2..ac7c121ea55 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java @@ -58,6 +58,51 @@ public abstract class FieldDataSource { public static abstract class Bytes extends FieldDataSource { + public static abstract class WithOrdinals extends Bytes { + + public abstract BytesValues.WithOrdinals bytesValues(); + + public static class FieldData extends WithOrdinals implements ReaderContextAware { + + protected boolean needsHashes; + protected final IndexFieldData.WithOrdinals indexFieldData; + protected AtomicFieldData.WithOrdinals atomicFieldData; + private BytesValues.WithOrdinals bytesValues; + + public FieldData(IndexFieldData.WithOrdinals indexFieldData) { + this.indexFieldData = indexFieldData; + needsHashes = false; + } + + @Override + public Uniqueness getUniqueness() { + return Uniqueness.UNIQUE; + } + + public final void setNeedsHashes(boolean needsHashes) { + this.needsHashes = needsHashes; + } + + @Override + public void setNextReader(AtomicReaderContext reader) { + atomicFieldData = indexFieldData.load(reader); + if (bytesValues != null) { + bytesValues = atomicFieldData.getBytesValues(needsHashes); + } + } + + @Override + public BytesValues.WithOrdinals bytesValues() { + if (bytesValues == null) { + bytesValues = atomicFieldData.getBytesValues(needsHashes); + } + return bytesValues; + } + + } + + } + public static class FieldData extends Bytes implements ReaderContextAware { protected boolean needsHashes; diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java index ca159298019..7f47c703a5e 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java @@ -26,7 +26,7 @@ import org.elasticsearch.search.aggregations.support.ValuesSource; /** * */ -public final class BytesValuesSource implements ValuesSource { +public class BytesValuesSource implements ValuesSource { private final FieldDataSource source; @@ -39,4 +39,20 @@ public final class BytesValuesSource implements ValuesSource { return source.bytesValues(); } + public static final class WithOrdinals extends BytesValuesSource { + + private final FieldDataSource.Bytes.WithOrdinals source; + + public WithOrdinals(FieldDataSource.Bytes.WithOrdinals source) { + super(source); + this.source = source; + } + + @Override + public BytesValues.WithOrdinals bytesValues() { + return source.bytesValues(); + } + + } + } diff --git a/src/test/java/org/elasticsearch/benchmark/search/aggregations/TermsAggregationSearchBenchmark.java b/src/test/java/org/elasticsearch/benchmark/search/aggregations/TermsAggregationSearchBenchmark.java index 3b501711ea8..6e15f40b439 100644 --- a/src/test/java/org/elasticsearch/benchmark/search/aggregations/TermsAggregationSearchBenchmark.java +++ b/src/test/java/org/elasticsearch/benchmark/search/aggregations/TermsAggregationSearchBenchmark.java @@ -81,7 +81,7 @@ public class TermsAggregationSearchBenchmark { AGGREGATION { @Override SearchRequestBuilder addTermsAgg(SearchRequestBuilder builder, String name, String field, String executionHint) { - return builder.addAggregation(AggregationBuilders.terms(name).field(field)); + return builder.addAggregation(AggregationBuilders.terms(name).executionHint(executionHint).field(field)); } @Override @@ -234,6 +234,8 @@ public class TermsAggregationSearchBenchmark { stats.add(terms("terms_facet_map_s_dv", Method.FACET, "s_value_dv", "map")); stats.add(terms("terms_agg_s", Method.AGGREGATION, "s_value", null)); stats.add(terms("terms_agg_s_dv", Method.AGGREGATION, "s_value_dv", null)); + stats.add(terms("terms_agg_map_s", Method.AGGREGATION, "s_value", "map")); + stats.add(terms("terms_agg_map_s_dv", Method.AGGREGATION, "s_value_dv", "map")); stats.add(terms("terms_facet_l", Method.FACET, "l_value", null)); stats.add(terms("terms_facet_l_dv", Method.FACET, "l_value_dv", null)); stats.add(terms("terms_agg_l", Method.AGGREGATION, "l_value", null)); @@ -244,6 +246,8 @@ public class TermsAggregationSearchBenchmark { stats.add(terms("terms_facet_map_sm_dv", Method.FACET, "sm_value_dv", "map")); stats.add(terms("terms_agg_sm", Method.AGGREGATION, "sm_value", null)); stats.add(terms("terms_agg_sm_dv", Method.AGGREGATION, "sm_value_dv", null)); + stats.add(terms("terms_agg_map_sm", Method.AGGREGATION, "sm_value", "map")); + stats.add(terms("terms_agg_map_sm_dv", Method.AGGREGATION, "sm_value_dv", "map")); stats.add(terms("terms_facet_lm", Method.FACET, "lm_value", null)); stats.add(terms("terms_facet_lm_dv", Method.FACET, "lm_value_dv", null)); stats.add(terms("terms_agg_lm", Method.AGGREGATION, "lm_value", null)); diff --git a/src/test/java/org/elasticsearch/common/util/BigArraysTests.java b/src/test/java/org/elasticsearch/common/util/BigArraysTests.java index ba1825897a0..0517299703c 100644 --- a/src/test/java/org/elasticsearch/common/util/BigArraysTests.java +++ b/src/test/java/org/elasticsearch/common/util/BigArraysTests.java @@ -109,4 +109,24 @@ public class BigArraysTests extends ElasticsearchTestCase { } } + public void testLongArrayFill() { + final int len = randomIntBetween(1, 100000); + final int fromIndex = randomIntBetween(0, len - 1); + final int toIndex = randomBoolean() + ? Math.min(fromIndex + randomInt(100), len) // single page + : randomIntBetween(fromIndex, len); // likely multiple pages + final LongArray array2 = BigArrays.newLongArray(len); + final long[] array1 = new long[len]; + for (int i = 0; i < len; ++i) { + array1[i] = randomLong(); + array2.set(i, array1[i]); + } + final long rand = randomLong(); + Arrays.fill(array1, fromIndex, toIndex, rand); + array2.fill(fromIndex, toIndex, rand); + for (int i = 0; i < len; ++i) { + assertEquals(array1[i], array2.get(i)); + } + } + } diff --git a/src/test/java/org/elasticsearch/search/aggregations/RandomTests.java b/src/test/java/org/elasticsearch/search/aggregations/RandomTests.java index 8381f670b34..606c365f26b 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/RandomTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/RandomTests.java @@ -28,11 +28,12 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.index.query.RangeFilterBuilder; +import org.elasticsearch.search.aggregations.bucket.filter.Filter; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.range.Range; import org.elasticsearch.search.aggregations.bucket.range.RangeBuilder; import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.elasticsearch.search.aggregations.bucket.filter.Filter; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.test.ElasticsearchIntegrationTest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; @@ -194,23 +195,29 @@ public class RandomTests extends ElasticsearchIntegrationTest { SearchResponse resp = client().prepareSearch("idx") .addAggregation(terms("long").field("long_values").size(maxNumTerms).subAggregation(min("min").field("num"))) .addAggregation(terms("double").field("double_values").size(maxNumTerms).subAggregation(max("max").field("num"))) - .addAggregation(terms("string").field("string_values").size(maxNumTerms).subAggregation(stats("stats").field("num"))).execute().actionGet(); + .addAggregation(terms("string_map").field("string_values").executionHint(TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP).size(maxNumTerms).subAggregation(stats("stats").field("num"))) + .addAggregation(terms("string_ordinals").field("string_values").executionHint(TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS).size(maxNumTerms).subAggregation(extendedStats("stats").field("num"))).execute().actionGet(); assertEquals(0, resp.getFailedShards()); final Terms longTerms = resp.getAggregations().get("long"); final Terms doubleTerms = resp.getAggregations().get("double"); - final Terms stringTerms = resp.getAggregations().get("string"); + final Terms stringMapTerms = resp.getAggregations().get("string_map"); + final Terms stringOrdinalsTerms = resp.getAggregations().get("string_ordinals"); assertEquals(valuesSet.size(), longTerms.buckets().size()); assertEquals(valuesSet.size(), doubleTerms.buckets().size()); - assertEquals(valuesSet.size(), stringTerms.buckets().size()); + assertEquals(valuesSet.size(), stringMapTerms.buckets().size()); + assertEquals(valuesSet.size(), stringOrdinalsTerms.buckets().size()); for (Terms.Bucket bucket : longTerms.buckets()) { final Terms.Bucket doubleBucket = doubleTerms.getByTerm(Double.toString(Long.parseLong(bucket.getKey().string()))); - final Terms.Bucket stringBucket = stringTerms.getByTerm(bucket.getKey().string()); + final Terms.Bucket stringMapBucket = stringMapTerms.getByTerm(bucket.getKey().string()); + final Terms.Bucket stringOrdinalsBucket = stringOrdinalsTerms.getByTerm(bucket.getKey().string()); assertNotNull(doubleBucket); - assertNotNull(stringBucket); + assertNotNull(stringMapBucket); + assertNotNull(stringOrdinalsBucket); assertEquals(bucket.getDocCount(), doubleBucket.getDocCount()); - assertEquals(bucket.getDocCount(), stringBucket.getDocCount()); + assertEquals(bucket.getDocCount(), stringMapBucket.getDocCount()); + assertEquals(bucket.getDocCount(), stringOrdinalsBucket.getDocCount()); } } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java index bb5121a0754..1b86b1a052a 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java @@ -24,8 +24,10 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.search.aggregations.bucket.filter.Filter; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.hamcrest.Matchers; @@ -33,10 +35,12 @@ import org.junit.Before; import org.junit.Test; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.FilterBuilders.termFilter; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.search.aggregations.AggregationBuilders.*; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; @@ -58,6 +62,10 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { .build(); } + private String randomExecutionHint() { + return randomFrom(Arrays.asList(null, TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP, TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS)); + } + @Before public void init() throws Exception { createIndex("idx"); @@ -88,6 +96,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValueField() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value")) .execute().actionGet(); @@ -254,6 +263,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValueField_WithMaxSize() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value") .size(20) .order(Terms.Order.TERM_ASC)) // we need to sort by terms cause we're checking the first 20 values @@ -278,6 +288,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValueField_OrderedByTermAsc() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value") .order(Terms.Order.TERM_ASC)) .execute().actionGet(); @@ -302,6 +313,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValueField_OrderedByTermDesc() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value") .order(Terms.Order.TERM_DESC)) .execute().actionGet(); @@ -326,6 +338,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValuedField_WithSubAggregation() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value") .subAggregation(count("count").field("values"))) .execute().actionGet(); @@ -352,6 +365,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValuedField_WithSubAggregation_Inherited() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value") .subAggregation(count("count"))) .execute().actionGet(); @@ -378,6 +392,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void singleValuedField_WithValueScript() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value") .script("'foo_' + _value")) .execute().actionGet(); @@ -401,6 +416,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void multiValuedField_WithValueScript_NotUnique() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("values") .script("_value.substring(0,3)")) .execute().actionGet(); @@ -422,6 +438,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void multiValuedField() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("values")) .execute().actionGet(); @@ -448,6 +465,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void multiValuedField_WithValueScript() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("values") .script("'foo_' + _value")) .execute().actionGet(); @@ -493,6 +511,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void multiValuedField_WithValueScript_WithInheritedSubAggregator() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("values") .script("'foo_' + _value") .subAggregation(count("count"))) @@ -527,6 +546,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void script_SingleValue() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .script("doc['value'].value")) .execute().actionGet(); @@ -549,6 +569,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void script_SingleValue_ExplicitSingleValue() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .script("doc['value'].value")) .execute().actionGet(); @@ -571,6 +592,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void script_SingleValue_WithSubAggregator_Inherited() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .script("doc['value'].value") .subAggregation(count("count"))) .execute().actionGet(); @@ -597,6 +619,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void script_MultiValued() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .script("doc['values'].values")) .execute().actionGet(); @@ -623,6 +646,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void script_MultiValued_WithAggregatorInherited() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .script("doc['values'].values") .subAggregation(count("count"))) .execute().actionGet(); @@ -656,6 +680,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void unmapped() throws Exception { SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value")) .execute().actionGet(); @@ -671,6 +696,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { public void partiallyUnmapped() throws Exception { SearchResponse response = client().prepareSearch("idx", "idx_unmapped").setTypes("type") .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) .field("value")) .execute().actionGet(); @@ -689,6 +715,30 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { } } + @Test + public void stringTermsNestedIntoPerBucketAggregator() throws Exception { + // no execution hint so that the logic that decides whether or not to use ordinals is executed + SearchResponse response = client().prepareSearch("idx").setTypes("type") + .addAggregation(filter("filter").filter(termFilter("values", "val3")).subAggregation(terms("terms").field("values"))) + .execute().actionGet(); + + assertThat(response.getFailedShards(), equalTo(0)); + + Filter filter = response.getAggregations().get("filter"); + + Terms terms = filter.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.buckets().size(), equalTo(3)); + + for (int i = 2; i <= 4; i++) { + Terms.Bucket bucket = terms.getByTerm("val" + i); + assertThat(bucket, notNullValue()); + assertThat(bucket.getKey().string(), equalTo("val" + i)); + assertThat(bucket.getDocCount(), equalTo(i == 3 ? 2L : 1L)); + } + } + @Test public void emptyAggregation() throws Exception { prepareCreate("empty_bucket_idx").addMapping("type", "value", "type=integer").execute().actionGet();