Aggregations: Ordinals-based string bucketing support.
When the ValuesSource has ordinals, terms ordinals are used as a cache key to bucket ordinals. This can make terms aggregations on String terms significantly faster. Close #4350
This commit is contained in:
parent
cc61348013
commit
36bd9cc432
|
@ -236,4 +236,31 @@ http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`L
|
|||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
||||
|
||||
==== Execution hint
|
||||
|
||||
There are two mechanisms by which terms aggregations can be executed: either by using field values directly in order to aggregate
|
||||
data per-bucket (`map`), or by using ordinals of the field values instead of the values themselves (`ordinals`). Although the
|
||||
latter execution mode can be expected to be slightly faster, it is only available for use when the underlying data source exposes
|
||||
those terms ordinals. Moreover, it may actually be slower if most field values are unique. Elasticsearch tries to have sensible
|
||||
defaults when it comes to the execution mode that should be used, but in case you know that an execution mode may perform better
|
||||
than the other one, you have the ability to provide Elasticsearch with a hint:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"tags" : {
|
||||
"terms" : {
|
||||
"field" : "tags",
|
||||
"execution_hint": "map" <1>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
<1> the possible values are `map` and `ordinals`
|
||||
|
||||
Please note that Elasticsearch will ignore this execution hint if it is not applicable.
|
||||
|
|
|
@ -131,6 +131,13 @@ public enum BigArrays {
|
|||
assert indexIsInt(index);
|
||||
return array[(int) index] += inc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(long fromIndex, long toIndex, long value) {
|
||||
assert indexIsInt(fromIndex);
|
||||
assert indexIsInt(toIndex);
|
||||
Arrays.fill(array, (int) fromIndex, (int) toIndex, value);
|
||||
}
|
||||
}
|
||||
|
||||
private static class DoubleArrayWrapper implements DoubleArray {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.util;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
@ -92,4 +93,20 @@ final class BigLongArray extends AbstractBigArray implements LongArray {
|
|||
this.size = newSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(long fromIndex, long toIndex, long value) {
|
||||
Preconditions.checkArgument(fromIndex <= toIndex);
|
||||
final int fromPage = pageIndex(fromIndex);
|
||||
final int toPage = pageIndex(toIndex - 1);
|
||||
if (fromPage == toPage) {
|
||||
Arrays.fill(pages[fromPage], indexInPage(fromIndex), indexInPage(toIndex - 1) + 1, value);
|
||||
} else {
|
||||
Arrays.fill(pages[fromPage], indexInPage(fromIndex), pages[fromPage].length, value);
|
||||
for (int i = fromPage + 1; i < toPage; ++i) {
|
||||
Arrays.fill(pages[i], value);
|
||||
}
|
||||
Arrays.fill(pages[toPage], 0, indexInPage(toIndex - 1) + 1, value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -39,4 +39,9 @@ public interface LongArray extends BigArray {
|
|||
*/
|
||||
public abstract long increment(long index, long inc);
|
||||
|
||||
/**
|
||||
* Fill slots between <code>fromIndex</code> inclusive to <code>toIndex</code> exclusive with <code>value</code>.
|
||||
*/
|
||||
public abstract void fill(long fromIndex, long toIndex, long value);
|
||||
|
||||
}
|
||||
|
|
|
@ -19,9 +19,14 @@
|
|||
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.elasticsearch.common.lucene.ReaderContextAware;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.LongArray;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
||||
|
@ -29,6 +34,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriority
|
|||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -37,7 +43,6 @@ import java.util.Collections;
|
|||
/**
|
||||
* An aggregator of string values.
|
||||
*/
|
||||
// TODO we need a similar aggregator that would use ords, similarly to TermsStringOrdinalsFacetExecutor
|
||||
public class StringTermsAggregator extends BucketsAggregator {
|
||||
|
||||
private static final int INITIAL_CAPACITY = 50; // TODO sizing
|
||||
|
@ -46,7 +51,7 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final BytesRefHash bucketOrds;
|
||||
protected final BytesRefHash bucketOrds;
|
||||
private final IncludeExclude includeExclude;
|
||||
|
||||
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
|
@ -130,5 +135,56 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
return new StringTerms(name, order, requiredSize, Collections.<InternalTerms.Bucket>emptyList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension of StringTermsAggregator that caches bucket ords using terms ordinals.
|
||||
*/
|
||||
public static class WithOrdinals extends StringTermsAggregator implements ReaderContextAware {
|
||||
|
||||
private final BytesValuesSource.WithOrdinals valuesSource;
|
||||
private BytesValues.WithOrdinals bytesValues;
|
||||
private Ordinals.Docs ordinals;
|
||||
private LongArray ordinalToBucket;
|
||||
|
||||
public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, InternalOrder order, int requiredSize,
|
||||
int shardSize, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, factories, valuesSource, order, requiredSize, shardSize, null, aggregationContext, parent);
|
||||
this.valuesSource = valuesSource;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext reader) {
|
||||
bytesValues = valuesSource.bytesValues();
|
||||
ordinals = bytesValues.ordinals();
|
||||
final long maxOrd = ordinals.getMaxOrd();
|
||||
if (ordinalToBucket == null || ordinalToBucket.size() < maxOrd) {
|
||||
ordinalToBucket = BigArrays.newLongArray(BigArrays.overSize(maxOrd));
|
||||
}
|
||||
ordinalToBucket.fill(0, maxOrd, -1L);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrdinal) throws IOException {
|
||||
assert owningBucketOrdinal == 0;
|
||||
final int valuesCount = ordinals.setDocument(doc);
|
||||
|
||||
for (int i = 0; i < valuesCount; ++i) {
|
||||
final long ord = ordinals.nextOrd();
|
||||
long bucketOrd = ordinalToBucket.get(ord);
|
||||
if (bucketOrd < 0) { // unlikely condition on a low-cardinality field
|
||||
final BytesRef bytes = bytesValues.getValueByOrd(ord);
|
||||
final int hash = bytesValues.currentValueHash();
|
||||
assert hash == bytes.hashCode();
|
||||
bucketOrd = bucketOrds.add(bytes, hash);
|
||||
if (bucketOrd < 0) { // already seen in another segment
|
||||
bucketOrd = - 1 - bucketOrd;
|
||||
}
|
||||
ordinalToBucket.set(ord, bucketOrd);
|
||||
}
|
||||
|
||||
collectBucket(doc, bucketOrd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.Aggregator.BucketAggregationMode;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValueSourceAggregatorFactory;
|
||||
|
@ -34,17 +36,22 @@ import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource
|
|||
*/
|
||||
public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
||||
|
||||
public static final String EXECUTION_HINT_VALUE_MAP = "map";
|
||||
public static final String EXECUTION_HINT_VALUE_ORDINALS = "ordinals";
|
||||
|
||||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final IncludeExclude includeExclude;
|
||||
private final String executionHint;
|
||||
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude) {
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, String executionHint) {
|
||||
super(name, StringTerms.TYPE.name(), valueSourceConfig);
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.includeExclude = includeExclude;
|
||||
this.executionHint = executionHint;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -52,10 +59,46 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
|||
return new UnmappedTermsAggregator(name, order, requiredSize, aggregationContext, parent);
|
||||
}
|
||||
|
||||
private static boolean hasParentBucketAggregator(Aggregator parent) {
|
||||
if (parent == null) {
|
||||
return false;
|
||||
} else if (parent.bucketAggregationMode() == BucketAggregationMode.PER_BUCKET) {
|
||||
return true;
|
||||
} else {
|
||||
return hasParentBucketAggregator(parent.parent());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
if (valuesSource instanceof BytesValuesSource) {
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
|
||||
if (executionHint != null && !executionHint.equals(EXECUTION_HINT_VALUE_MAP) && !executionHint.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
|
||||
throw new ElasticSearchIllegalArgumentException("execution_hint can only be '" + EXECUTION_HINT_VALUE_MAP + "' or '" + EXECUTION_HINT_VALUE_ORDINALS + "', not " + executionHint);
|
||||
}
|
||||
String execution = executionHint;
|
||||
if (!(valuesSource instanceof BytesValuesSource.WithOrdinals)) {
|
||||
execution = EXECUTION_HINT_VALUE_MAP;
|
||||
} else if (includeExclude != null) {
|
||||
execution = EXECUTION_HINT_VALUE_MAP;
|
||||
}
|
||||
if (execution == null) {
|
||||
if ((valuesSource instanceof BytesValuesSource.WithOrdinals)
|
||||
&& !hasParentBucketAggregator(parent)) {
|
||||
execution = EXECUTION_HINT_VALUE_ORDINALS;
|
||||
} else {
|
||||
execution = EXECUTION_HINT_VALUE_MAP;
|
||||
}
|
||||
}
|
||||
assert execution != null;
|
||||
|
||||
if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
|
||||
assert includeExclude == null;
|
||||
final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name, factories, (BytesValuesSource.WithOrdinals) valuesSource, order, requiredSize, shardSize, aggregationContext, parent);
|
||||
aggregationContext.registerReaderContextAware(aggregator);
|
||||
return aggregator;
|
||||
} else {
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
|
||||
}
|
||||
}
|
||||
|
||||
if (includeExclude != null) {
|
||||
|
|
|
@ -20,6 +20,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
private int includeFlags;
|
||||
private String excludePattern;
|
||||
private int excludeFlags;
|
||||
private String executionHint;
|
||||
|
||||
public TermsBuilder(String name) {
|
||||
super(name, "terms");
|
||||
|
@ -102,6 +103,11 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
return this;
|
||||
}
|
||||
|
||||
public TermsBuilder executionHint(String executionHint) {
|
||||
this.executionHint = executionHint;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (size >=0) {
|
||||
|
@ -137,6 +143,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
.endObject();
|
||||
}
|
||||
}
|
||||
if (executionHint != null) {
|
||||
builder.field("execution_hint", executionHint);
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ public class TermsParser implements Aggregator.Parser {
|
|||
int includeFlags = 0; // 0 means no flags
|
||||
String exclude = null;
|
||||
int excludeFlags = 0; // 0 means no flags
|
||||
String executionHint = null;
|
||||
|
||||
|
||||
XContentParser.Token token;
|
||||
|
@ -94,6 +95,8 @@ public class TermsParser implements Aggregator.Parser {
|
|||
include = parser.text();
|
||||
} else if ("exclude".equals(currentFieldName)) {
|
||||
exclude = parser.text();
|
||||
} else if ("execution_hint".equals(currentFieldName) || "executionHint".equals(currentFieldName)) {
|
||||
executionHint = parser.text();
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
|
||||
if ("script_values_unique".equals(currentFieldName)) {
|
||||
|
@ -192,14 +195,14 @@ public class TermsParser implements Aggregator.Parser {
|
|||
if (!assumeUnique) {
|
||||
config.ensureUnique(true);
|
||||
}
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
|
||||
}
|
||||
|
||||
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
|
||||
if (mapper == null) {
|
||||
ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
|
||||
config.unmapped(true);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
|
||||
}
|
||||
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
|
||||
|
||||
|
@ -241,7 +244,7 @@ public class TermsParser implements Aggregator.Parser {
|
|||
config.ensureUnique(true);
|
||||
}
|
||||
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
|
||||
}
|
||||
|
||||
static InternalOrder resolveOrder(String key, boolean asc) {
|
||||
|
|
|
@ -27,9 +27,11 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.elasticsearch.cache.recycler.CacheRecycler;
|
||||
import org.elasticsearch.common.lucene.ReaderContextAware;
|
||||
import org.elasticsearch.common.lucene.ScorerAware;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||
import org.elasticsearch.search.aggregations.support.FieldDataSource.Uniqueness;
|
||||
import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.geopoints.GeoPointValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource;
|
||||
|
@ -160,10 +162,15 @@ public class AggregationContext implements ReaderContextAware, ScorerAware {
|
|||
return new NumericValuesSource(dataSource, config.formatter(), config.parser());
|
||||
}
|
||||
|
||||
private BytesValuesSource bytesField(ObjectObjectOpenHashMap<String, FieldDataSource> fieldDataSources, ValuesSourceConfig<?> config) {
|
||||
private ValuesSource bytesField(ObjectObjectOpenHashMap<String, FieldDataSource> fieldDataSources, ValuesSourceConfig<?> config) {
|
||||
FieldDataSource dataSource = fieldDataSources.get(config.fieldContext.field());
|
||||
if (dataSource == null) {
|
||||
dataSource = new FieldDataSource.Bytes.FieldData(config.fieldContext.indexFieldData());
|
||||
final IndexFieldData<?> indexFieldData = config.fieldContext.indexFieldData();
|
||||
if (indexFieldData instanceof IndexFieldData.WithOrdinals<?>) {
|
||||
dataSource = new FieldDataSource.Bytes.WithOrdinals.FieldData((IndexFieldData.WithOrdinals<?>) indexFieldData);
|
||||
} else {
|
||||
dataSource = new FieldDataSource.Bytes.FieldData(indexFieldData);
|
||||
}
|
||||
setReaderIfNeeded((ReaderContextAware) dataSource);
|
||||
readerAwares.add((ReaderContextAware) dataSource);
|
||||
fieldDataSources.put(config.fieldContext.field(), dataSource);
|
||||
|
@ -178,14 +185,19 @@ public class AggregationContext implements ReaderContextAware, ScorerAware {
|
|||
// Even in case we wrap field data, we might still need to wrap for sorting, because the wrapped field data might be
|
||||
// eg. a numeric field data that doesn't sort according to the byte order. However field data values are unique so no
|
||||
// need to wrap for uniqueness
|
||||
if ((config.ensureUnique && !(dataSource instanceof FieldDataSource.Bytes.FieldData)) || config.ensureSorted) {
|
||||
if ((config.ensureUnique && dataSource.getUniqueness() != Uniqueness.UNIQUE) || config.ensureSorted) {
|
||||
dataSource = new FieldDataSource.Bytes.SortedAndUnique(dataSource);
|
||||
readerAwares.add((ReaderContextAware) dataSource);
|
||||
}
|
||||
|
||||
if (config.needsHashes) { // the data source needs hash if at least one consumer needs hashes
|
||||
dataSource.setNeedsHashes(true);
|
||||
}
|
||||
return new BytesValuesSource(dataSource);
|
||||
if (dataSource instanceof FieldDataSource.Bytes.WithOrdinals) {
|
||||
return new BytesValuesSource.WithOrdinals((FieldDataSource.Bytes.WithOrdinals) dataSource);
|
||||
} else {
|
||||
return new BytesValuesSource(dataSource);
|
||||
}
|
||||
}
|
||||
|
||||
private BytesValuesSource bytesScript(ValuesSourceConfig<?> config) {
|
||||
|
|
|
@ -58,6 +58,51 @@ public abstract class FieldDataSource {
|
|||
|
||||
public static abstract class Bytes extends FieldDataSource {
|
||||
|
||||
public static abstract class WithOrdinals extends Bytes {
|
||||
|
||||
public abstract BytesValues.WithOrdinals bytesValues();
|
||||
|
||||
public static class FieldData extends WithOrdinals implements ReaderContextAware {
|
||||
|
||||
protected boolean needsHashes;
|
||||
protected final IndexFieldData.WithOrdinals<?> indexFieldData;
|
||||
protected AtomicFieldData.WithOrdinals<?> atomicFieldData;
|
||||
private BytesValues.WithOrdinals bytesValues;
|
||||
|
||||
public FieldData(IndexFieldData.WithOrdinals<?> indexFieldData) {
|
||||
this.indexFieldData = indexFieldData;
|
||||
needsHashes = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Uniqueness getUniqueness() {
|
||||
return Uniqueness.UNIQUE;
|
||||
}
|
||||
|
||||
public final void setNeedsHashes(boolean needsHashes) {
|
||||
this.needsHashes = needsHashes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext reader) {
|
||||
atomicFieldData = indexFieldData.load(reader);
|
||||
if (bytesValues != null) {
|
||||
bytesValues = atomicFieldData.getBytesValues(needsHashes);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues.WithOrdinals bytesValues() {
|
||||
if (bytesValues == null) {
|
||||
bytesValues = atomicFieldData.getBytesValues(needsHashes);
|
||||
}
|
||||
return bytesValues;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class FieldData extends Bytes implements ReaderContextAware {
|
||||
|
||||
protected boolean needsHashes;
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.elasticsearch.search.aggregations.support.ValuesSource;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public final class BytesValuesSource implements ValuesSource {
|
||||
public class BytesValuesSource implements ValuesSource {
|
||||
|
||||
private final FieldDataSource source;
|
||||
|
||||
|
@ -39,4 +39,20 @@ public final class BytesValuesSource implements ValuesSource {
|
|||
return source.bytesValues();
|
||||
}
|
||||
|
||||
public static final class WithOrdinals extends BytesValuesSource {
|
||||
|
||||
private final FieldDataSource.Bytes.WithOrdinals source;
|
||||
|
||||
public WithOrdinals(FieldDataSource.Bytes.WithOrdinals source) {
|
||||
super(source);
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues.WithOrdinals bytesValues() {
|
||||
return source.bytesValues();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ public class TermsAggregationSearchBenchmark {
|
|||
AGGREGATION {
|
||||
@Override
|
||||
SearchRequestBuilder addTermsAgg(SearchRequestBuilder builder, String name, String field, String executionHint) {
|
||||
return builder.addAggregation(AggregationBuilders.terms(name).field(field));
|
||||
return builder.addAggregation(AggregationBuilders.terms(name).executionHint(executionHint).field(field));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -234,6 +234,8 @@ public class TermsAggregationSearchBenchmark {
|
|||
stats.add(terms("terms_facet_map_s_dv", Method.FACET, "s_value_dv", "map"));
|
||||
stats.add(terms("terms_agg_s", Method.AGGREGATION, "s_value", null));
|
||||
stats.add(terms("terms_agg_s_dv", Method.AGGREGATION, "s_value_dv", null));
|
||||
stats.add(terms("terms_agg_map_s", Method.AGGREGATION, "s_value", "map"));
|
||||
stats.add(terms("terms_agg_map_s_dv", Method.AGGREGATION, "s_value_dv", "map"));
|
||||
stats.add(terms("terms_facet_l", Method.FACET, "l_value", null));
|
||||
stats.add(terms("terms_facet_l_dv", Method.FACET, "l_value_dv", null));
|
||||
stats.add(terms("terms_agg_l", Method.AGGREGATION, "l_value", null));
|
||||
|
@ -244,6 +246,8 @@ public class TermsAggregationSearchBenchmark {
|
|||
stats.add(terms("terms_facet_map_sm_dv", Method.FACET, "sm_value_dv", "map"));
|
||||
stats.add(terms("terms_agg_sm", Method.AGGREGATION, "sm_value", null));
|
||||
stats.add(terms("terms_agg_sm_dv", Method.AGGREGATION, "sm_value_dv", null));
|
||||
stats.add(terms("terms_agg_map_sm", Method.AGGREGATION, "sm_value", "map"));
|
||||
stats.add(terms("terms_agg_map_sm_dv", Method.AGGREGATION, "sm_value_dv", "map"));
|
||||
stats.add(terms("terms_facet_lm", Method.FACET, "lm_value", null));
|
||||
stats.add(terms("terms_facet_lm_dv", Method.FACET, "lm_value_dv", null));
|
||||
stats.add(terms("terms_agg_lm", Method.AGGREGATION, "lm_value", null));
|
||||
|
|
|
@ -109,4 +109,24 @@ public class BigArraysTests extends ElasticsearchTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testLongArrayFill() {
|
||||
final int len = randomIntBetween(1, 100000);
|
||||
final int fromIndex = randomIntBetween(0, len - 1);
|
||||
final int toIndex = randomBoolean()
|
||||
? Math.min(fromIndex + randomInt(100), len) // single page
|
||||
: randomIntBetween(fromIndex, len); // likely multiple pages
|
||||
final LongArray array2 = BigArrays.newLongArray(len);
|
||||
final long[] array1 = new long[len];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
array1[i] = randomLong();
|
||||
array2.set(i, array1[i]);
|
||||
}
|
||||
final long rand = randomLong();
|
||||
Arrays.fill(array1, fromIndex, toIndex, rand);
|
||||
array2.fill(fromIndex, toIndex, rand);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
assertEquals(array1[i], array2.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,11 +28,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.query.FilterBuilders;
|
||||
import org.elasticsearch.index.query.RangeFilterBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||
import org.elasticsearch.search.aggregations.bucket.range.Range;
|
||||
import org.elasticsearch.search.aggregations.bucket.range.RangeBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
|
@ -194,23 +195,29 @@ public class RandomTests extends ElasticsearchIntegrationTest {
|
|||
SearchResponse resp = client().prepareSearch("idx")
|
||||
.addAggregation(terms("long").field("long_values").size(maxNumTerms).subAggregation(min("min").field("num")))
|
||||
.addAggregation(terms("double").field("double_values").size(maxNumTerms).subAggregation(max("max").field("num")))
|
||||
.addAggregation(terms("string").field("string_values").size(maxNumTerms).subAggregation(stats("stats").field("num"))).execute().actionGet();
|
||||
.addAggregation(terms("string_map").field("string_values").executionHint(TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP).size(maxNumTerms).subAggregation(stats("stats").field("num")))
|
||||
.addAggregation(terms("string_ordinals").field("string_values").executionHint(TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS).size(maxNumTerms).subAggregation(extendedStats("stats").field("num"))).execute().actionGet();
|
||||
assertEquals(0, resp.getFailedShards());
|
||||
|
||||
final Terms longTerms = resp.getAggregations().get("long");
|
||||
final Terms doubleTerms = resp.getAggregations().get("double");
|
||||
final Terms stringTerms = resp.getAggregations().get("string");
|
||||
final Terms stringMapTerms = resp.getAggregations().get("string_map");
|
||||
final Terms stringOrdinalsTerms = resp.getAggregations().get("string_ordinals");
|
||||
|
||||
assertEquals(valuesSet.size(), longTerms.buckets().size());
|
||||
assertEquals(valuesSet.size(), doubleTerms.buckets().size());
|
||||
assertEquals(valuesSet.size(), stringTerms.buckets().size());
|
||||
assertEquals(valuesSet.size(), stringMapTerms.buckets().size());
|
||||
assertEquals(valuesSet.size(), stringOrdinalsTerms.buckets().size());
|
||||
for (Terms.Bucket bucket : longTerms.buckets()) {
|
||||
final Terms.Bucket doubleBucket = doubleTerms.getByTerm(Double.toString(Long.parseLong(bucket.getKey().string())));
|
||||
final Terms.Bucket stringBucket = stringTerms.getByTerm(bucket.getKey().string());
|
||||
final Terms.Bucket stringMapBucket = stringMapTerms.getByTerm(bucket.getKey().string());
|
||||
final Terms.Bucket stringOrdinalsBucket = stringOrdinalsTerms.getByTerm(bucket.getKey().string());
|
||||
assertNotNull(doubleBucket);
|
||||
assertNotNull(stringBucket);
|
||||
assertNotNull(stringMapBucket);
|
||||
assertNotNull(stringOrdinalsBucket);
|
||||
assertEquals(bucket.getDocCount(), doubleBucket.getDocCount());
|
||||
assertEquals(bucket.getDocCount(), stringBucket.getDocCount());
|
||||
assertEquals(bucket.getDocCount(), stringMapBucket.getDocCount());
|
||||
assertEquals(bucket.getDocCount(), stringOrdinalsBucket.getDocCount());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,8 +24,10 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
|
|||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.hamcrest.Matchers;
|
||||
|
@ -33,10 +35,12 @@ import org.junit.Before;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.*;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
|
@ -58,6 +62,10 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
.build();
|
||||
}
|
||||
|
||||
private String randomExecutionHint() {
|
||||
return randomFrom(Arrays.asList(null, TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP, TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS));
|
||||
}
|
||||
|
||||
@Before
|
||||
public void init() throws Exception {
|
||||
createIndex("idx");
|
||||
|
@ -88,6 +96,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValueField() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -254,6 +263,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValueField_WithMaxSize() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value")
|
||||
.size(20)
|
||||
.order(Terms.Order.TERM_ASC)) // we need to sort by terms cause we're checking the first 20 values
|
||||
|
@ -278,6 +288,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValueField_OrderedByTermAsc() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value")
|
||||
.order(Terms.Order.TERM_ASC))
|
||||
.execute().actionGet();
|
||||
|
@ -302,6 +313,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValueField_OrderedByTermDesc() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value")
|
||||
.order(Terms.Order.TERM_DESC))
|
||||
.execute().actionGet();
|
||||
|
@ -326,6 +338,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValuedField_WithSubAggregation() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value")
|
||||
.subAggregation(count("count").field("values")))
|
||||
.execute().actionGet();
|
||||
|
@ -352,6 +365,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValuedField_WithSubAggregation_Inherited() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value")
|
||||
.subAggregation(count("count")))
|
||||
.execute().actionGet();
|
||||
|
@ -378,6 +392,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void singleValuedField_WithValueScript() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value")
|
||||
.script("'foo_' + _value"))
|
||||
.execute().actionGet();
|
||||
|
@ -401,6 +416,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void multiValuedField_WithValueScript_NotUnique() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("values")
|
||||
.script("_value.substring(0,3)"))
|
||||
.execute().actionGet();
|
||||
|
@ -422,6 +438,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void multiValuedField() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("values"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -448,6 +465,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void multiValuedField_WithValueScript() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("values")
|
||||
.script("'foo_' + _value"))
|
||||
.execute().actionGet();
|
||||
|
@ -493,6 +511,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void multiValuedField_WithValueScript_WithInheritedSubAggregator() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("values")
|
||||
.script("'foo_' + _value")
|
||||
.subAggregation(count("count")))
|
||||
|
@ -527,6 +546,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void script_SingleValue() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.script("doc['value'].value"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -549,6 +569,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void script_SingleValue_ExplicitSingleValue() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.script("doc['value'].value"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -571,6 +592,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void script_SingleValue_WithSubAggregator_Inherited() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.script("doc['value'].value")
|
||||
.subAggregation(count("count")))
|
||||
.execute().actionGet();
|
||||
|
@ -597,6 +619,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void script_MultiValued() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.script("doc['values'].values"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -623,6 +646,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void script_MultiValued_WithAggregatorInherited() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.script("doc['values'].values")
|
||||
.subAggregation(count("count")))
|
||||
.execute().actionGet();
|
||||
|
@ -656,6 +680,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void unmapped() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -671,6 +696,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
public void partiallyUnmapped() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx", "idx_unmapped").setTypes("type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field("value"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -689,6 +715,30 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void stringTermsNestedIntoPerBucketAggregator() throws Exception {
|
||||
// no execution hint so that the logic that decides whether or not to use ordinals is executed
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.addAggregation(filter("filter").filter(termFilter("values", "val3")).subAggregation(terms("terms").field("values")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
Filter filter = response.getAggregations().get("filter");
|
||||
|
||||
Terms terms = filter.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(3));
|
||||
|
||||
for (int i = 2; i <= 4; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(i == 3 ? 2L : 1L));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emptyAggregation() throws Exception {
|
||||
prepareCreate("empty_bucket_idx").addMapping("type", "value", "type=integer").execute().actionGet();
|
||||
|
|
Loading…
Reference in New Issue