Added global ordinals terms aggregator impl that is optimized low cardinality fields.
Instead of resolving the global ordinal for each hit on the fly, resolve the global ordinals during post collect. On fields with not so many unique values, that can reduce the number of global ordinals significantly. Closes #5895 Closes #5854
This commit is contained in:
parent
4df4506875
commit
f3219f7098
|
@ -124,7 +124,7 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static abstract class GlobalOrdinalMapping implements Ordinals.Docs {
|
public static abstract class GlobalOrdinalMapping implements Ordinals.Docs {
|
||||||
|
|
||||||
protected final Ordinals.Docs segmentOrdinals;
|
protected final Ordinals.Docs segmentOrdinals;
|
||||||
private final long memorySizeInBytes;
|
private final long memorySizeInBytes;
|
||||||
|
@ -174,7 +174,7 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||||
return currentGlobalOrd = getGlobalOrd(segmentOrd);
|
return currentGlobalOrd = getGlobalOrd(segmentOrd);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract long getGlobalOrd(long segmentOrd);
|
public abstract long getGlobalOrd(long segmentOrd);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,7 +289,7 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected long getGlobalOrd(long segmentOrd) {
|
public long getGlobalOrd(long segmentOrd) {
|
||||||
return segmentOrd + segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
return segmentOrd + segmentOrdToGlobalOrdLookup.get(segmentOrd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -323,7 +323,7 @@ public class InternalGlobalOrdinalsBuilder extends AbstractIndexComponent implem
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected long getGlobalOrd(long segmentOrd) {
|
public long getGlobalOrd(long segmentOrd) {
|
||||||
return segmentOrd + segmentOrdToGlobalOrdLookup.get((int) segmentOrd);
|
return segmentOrd + segmentOrdToGlobalOrdLookup.get((int) segmentOrd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,11 +78,8 @@ public abstract class BucketsAggregator extends Aggregator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public LongArray getDocCounts() {
|
||||||
* Initializes the docCounts to the specified size.
|
return docCounts;
|
||||||
*/
|
|
||||||
public void initializeDocCounts(long maxOrd) {
|
|
||||||
docCounts = bigArrays.grow(docCounts, maxOrd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,7 +94,7 @@ public abstract class BucketsAggregator extends Aggregator {
|
||||||
/**
|
/**
|
||||||
* Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal)
|
* Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal)
|
||||||
*/
|
*/
|
||||||
protected final void incrementBucketDocCount(int inc, long bucketOrd) throws IOException {
|
protected final void incrementBucketDocCount(long inc, long bucketOrd) throws IOException {
|
||||||
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
|
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
|
||||||
docCounts.increment(bucketOrd, inc);
|
docCounts.increment(bucketOrd, inc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,9 @@ import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
import org.elasticsearch.common.lease.Releasables;
|
import org.elasticsearch.common.lease.Releasables;
|
||||||
|
import org.elasticsearch.common.util.LongArray;
|
||||||
import org.elasticsearch.common.util.LongHash;
|
import org.elasticsearch.common.util.LongHash;
|
||||||
import org.elasticsearch.index.fielddata.BytesValues;
|
import org.elasticsearch.index.fielddata.BytesValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
|
@ -37,6 +39,8 @@ import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import static org.elasticsearch.index.fielddata.ordinals.InternalGlobalOrdinalsBuilder.GlobalOrdinalMapping;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An aggregator of string values that relies on global ordinals in order to build buckets.
|
* An aggregator of string values that relies on global ordinals in order to build buckets.
|
||||||
*/
|
*/
|
||||||
|
@ -47,8 +51,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
protected Ordinals.Docs globalOrdinals;
|
protected Ordinals.Docs globalOrdinals;
|
||||||
|
|
||||||
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||||
super(name, factories, estimatedBucketCount, aggregationContext, parent, order, requiredSize, shardSize, minDocCount);
|
super(name, factories, maxOrd, aggregationContext, parent, order, requiredSize, shardSize, minDocCount);
|
||||||
this.valuesSource = valuesSource;
|
this.valuesSource = valuesSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +69,6 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
public void setNextReader(AtomicReaderContext reader) {
|
public void setNextReader(AtomicReaderContext reader) {
|
||||||
globalValues = valuesSource.globalBytesValues();
|
globalValues = valuesSource.globalBytesValues();
|
||||||
globalOrdinals = globalValues.ordinals();
|
globalOrdinals = globalValues.ordinals();
|
||||||
initializeDocCounts(globalOrdinals.getMaxOrd());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -135,9 +138,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
private final LongHash bucketOrds;
|
private final LongHash bucketOrds;
|
||||||
|
|
||||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext,
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext,
|
||||||
Aggregator parent) {
|
Aggregator parent) {
|
||||||
super(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
// Set maxOrd to estimatedBucketCount! To be conservative with memory.
|
||||||
|
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||||
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
|
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,4 +176,78 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variant of {@link GlobalOrdinalsStringTermsAggregator} that resolves global ordinals post segment collection
|
||||||
|
* instead of on the fly for each match.This is beneficial for low cardinality fields, because it can reduce
|
||||||
|
* the amount of look-ups significantly.
|
||||||
|
*/
|
||||||
|
public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
||||||
|
|
||||||
|
private final LongArray segmentDocCounts;
|
||||||
|
|
||||||
|
private Ordinals.Docs segmentOrdinals;
|
||||||
|
private LongArray current;
|
||||||
|
|
||||||
|
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||||
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||||
|
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||||
|
this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, long owningBucketOrdinal) throws IOException {
|
||||||
|
final int numOrds = segmentOrdinals.setDocument(doc);
|
||||||
|
for (int i = 0; i < numOrds; i++) {
|
||||||
|
final long segmentOrd = segmentOrdinals.nextOrd();
|
||||||
|
current.increment(segmentOrd, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(AtomicReaderContext reader) {
|
||||||
|
if (segmentOrdinals != null && segmentOrdinals.getMaxOrd() != globalOrdinals.getMaxOrd()) {
|
||||||
|
mapSegmentCountsToGlobalCounts();
|
||||||
|
}
|
||||||
|
|
||||||
|
super.setNextReader(reader);
|
||||||
|
BytesValues.WithOrdinals bytesValues = valuesSource.bytesValues();
|
||||||
|
segmentOrdinals = bytesValues.ordinals();
|
||||||
|
if (segmentOrdinals.getMaxOrd() != globalOrdinals.getMaxOrd()) {
|
||||||
|
current = segmentDocCounts;
|
||||||
|
} else {
|
||||||
|
current = getDocCounts();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doPostCollection() {
|
||||||
|
if (segmentOrdinals.getMaxOrd() != globalOrdinals.getMaxOrd()) {
|
||||||
|
mapSegmentCountsToGlobalCounts();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doClose() {
|
||||||
|
Releasables.close(segmentDocCounts);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mapSegmentCountsToGlobalCounts() {
|
||||||
|
// There is no public method in Ordinals.Docs that allows for this mapping...
|
||||||
|
// This is the cleanest way I can think of so far
|
||||||
|
GlobalOrdinalMapping mapping = (GlobalOrdinalMapping) globalOrdinals;
|
||||||
|
for (int i = 0; i < segmentDocCounts.size(); i++) {
|
||||||
|
final long inc = segmentDocCounts.set(i, 0);
|
||||||
|
if (inc == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final long globalOrd = mapping.getGlobalOrd(i);
|
||||||
|
try {
|
||||||
|
incrementBucketDocCount(inc, globalOrd);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw ExceptionsHelper.convertToElastic(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.search.aggregations.*;
|
import org.elasticsearch.search.aggregations.*;
|
||||||
|
@ -39,8 +40,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent) {
|
AggregationContext aggregationContext, Aggregator parent) {
|
||||||
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,8 +55,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent) {
|
AggregationContext aggregationContext, Aggregator parent) {
|
||||||
if (includeExclude != null) {
|
if (includeExclude != null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||||
}
|
}
|
||||||
|
@ -72,12 +73,12 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent) {
|
AggregationContext aggregationContext, Aggregator parent) {
|
||||||
if (includeExclude != null) {
|
if (includeExclude != null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||||
}
|
}
|
||||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -90,12 +91,32 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent) {
|
AggregationContext aggregationContext, Aggregator parent) {
|
||||||
if (includeExclude != null) {
|
if (includeExclude != null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||||
}
|
}
|
||||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean needsGlobalOrdinals() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||||
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||||
|
AggregationContext aggregationContext, Aggregator parent) {
|
||||||
|
if (includeExclude != null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||||
|
}
|
||||||
|
if (factories != AggregatorFactories.EMPTY) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode can only be used as a leaf aggregation");
|
||||||
|
}
|
||||||
|
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -120,8 +141,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount,
|
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount,
|
||||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent);
|
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent);
|
||||||
|
|
||||||
abstract boolean needsGlobalOrdinals();
|
abstract boolean needsGlobalOrdinals();
|
||||||
|
|
||||||
|
@ -200,6 +221,18 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
execution = ExecutionMode.MAP;
|
execution = ExecutionMode.MAP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final long maxOrd;
|
||||||
|
final double ratio;
|
||||||
|
if (execution == null || execution.needsGlobalOrdinals()) {
|
||||||
|
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
|
||||||
|
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
|
||||||
|
maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
|
||||||
|
ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs());
|
||||||
|
} else {
|
||||||
|
maxOrd = -1;
|
||||||
|
ratio = -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Let's try to use a good default
|
// Let's try to use a good default
|
||||||
if (execution == null) {
|
if (execution == null) {
|
||||||
// if there is a parent bucket aggregator the number of instances of this aggregator is going
|
// if there is a parent bucket aggregator the number of instances of this aggregator is going
|
||||||
|
@ -208,12 +241,23 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
||||||
if (hasParentBucketAggregator(parent)) {
|
if (hasParentBucketAggregator(parent)) {
|
||||||
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
|
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
|
||||||
} else {
|
} else {
|
||||||
execution = ExecutionMode.GLOBAL_ORDINALS;
|
if (factories == AggregatorFactories.EMPTY) {
|
||||||
|
if (ratio <= 0.5 && maxOrd <= 2048) {
|
||||||
|
// 0.5: At least we need reduce the number of global ordinals look-ups by half
|
||||||
|
// 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has additional memory usage, which directly linked to maxOrd, so we need to limit.
|
||||||
|
execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY;
|
||||||
|
} else {
|
||||||
|
execution = ExecutionMode.GLOBAL_ORDINALS;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
execution = ExecutionMode.GLOBAL_ORDINALS;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert execution != null;
|
assert execution != null;
|
||||||
valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals());
|
valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals());
|
||||||
return execution.create(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (includeExclude != null) {
|
if (includeExclude != null) {
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
package org.elasticsearch.search.aggregations.support;
|
package org.elasticsearch.search.aggregations.support;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReaderContext;
|
import org.apache.lucene.index.IndexReaderContext;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefArray;
|
import org.apache.lucene.util.BytesRefArray;
|
||||||
|
@ -29,6 +31,7 @@ import org.elasticsearch.common.lucene.TopReaderContextAware;
|
||||||
import org.elasticsearch.common.util.CollectionUtils;
|
import org.elasticsearch.common.util.CollectionUtils;
|
||||||
import org.elasticsearch.index.fielddata.*;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.AtomicFieldData.Order;
|
import org.elasticsearch.index.fielddata.AtomicFieldData.Order;
|
||||||
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
import org.elasticsearch.script.SearchScript;
|
import org.elasticsearch.script.SearchScript;
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues;
|
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues;
|
||||||
import org.elasticsearch.search.aggregations.support.values.ScriptBytesValues;
|
import org.elasticsearch.search.aggregations.support.values.ScriptBytesValues;
|
||||||
|
@ -159,6 +162,8 @@ public abstract class ValuesSource {
|
||||||
|
|
||||||
public abstract BytesValues.WithOrdinals globalBytesValues();
|
public abstract BytesValues.WithOrdinals globalBytesValues();
|
||||||
|
|
||||||
|
public abstract long globalMaxOrd(IndexSearcher indexSearcher);
|
||||||
|
|
||||||
public static class FieldData extends WithOrdinals implements ReaderContextAware {
|
public static class FieldData extends WithOrdinals implements ReaderContextAware {
|
||||||
|
|
||||||
protected boolean needsHashes;
|
protected boolean needsHashes;
|
||||||
|
@ -229,6 +234,21 @@ public abstract class ValuesSource {
|
||||||
}
|
}
|
||||||
return globalBytesValues;
|
return globalBytesValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long globalMaxOrd(IndexSearcher indexSearcher) {
|
||||||
|
IndexReader indexReader = indexSearcher.getIndexReader();
|
||||||
|
if (indexReader.leaves().isEmpty()) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
AtomicReaderContext atomicReaderContext = indexReader.leaves().get(0);
|
||||||
|
IndexFieldData.WithOrdinals<?> globalFieldData = indexFieldData.loadGlobal(indexReader);
|
||||||
|
AtomicFieldData.WithOrdinals afd = globalFieldData.load(atomicReaderContext);
|
||||||
|
BytesValues.WithOrdinals values = afd.getBytesValues(false);
|
||||||
|
Ordinals.Docs ordinals = values.ordinals();
|
||||||
|
return ordinals.getMaxOrd();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -186,7 +186,7 @@ public class GlobalOrdinalsBenchmark {
|
||||||
fieldName = fieldName + ".doc_values";
|
fieldName = fieldName + ".doc_values";
|
||||||
name = name + "_doc_values"; // can't have . in agg name
|
name = name + "_doc_values"; // can't have . in agg name
|
||||||
}
|
}
|
||||||
stats.add(terms(name, fieldName, "global_ordinals"));
|
stats.add(terms(name, fieldName, "global_ordinals_low_cardinality"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.hamcrest.Matchers;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
@ -59,6 +60,12 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
private static final String MULTI_VALUED_FIELD_NAME = "s_values";
|
private static final String MULTI_VALUED_FIELD_NAME = "s_values";
|
||||||
|
|
||||||
public static String randomExecutionHint() {
|
public static String randomExecutionHint() {
|
||||||
|
EnumSet<ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
|
||||||
|
modes.remove(ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY);
|
||||||
|
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String randomAllExecutionHint() {
|
||||||
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
|
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +113,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
final int minDocCount = randomInt(1);
|
final int minDocCount = randomInt(1);
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
.minDocCount(minDocCount)
|
.minDocCount(minDocCount)
|
||||||
.size(0))
|
.size(0))
|
||||||
|
@ -146,14 +153,16 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
@Test
|
@Test
|
||||||
public void singleValueField_withGlobalOrdinals() throws Exception {
|
public void singleValueField_withGlobalOrdinals() throws Exception {
|
||||||
ExecutionMode[] executionModes = new ExecutionMode[] {
|
ExecutionMode[] executionModes = new ExecutionMode[] {
|
||||||
|
null,
|
||||||
ExecutionMode.GLOBAL_ORDINALS,
|
ExecutionMode.GLOBAL_ORDINALS,
|
||||||
ExecutionMode.GLOBAL_ORDINALS_HASH
|
ExecutionMode.GLOBAL_ORDINALS_HASH,
|
||||||
|
ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY
|
||||||
};
|
};
|
||||||
for (ExecutionMode executionMode : executionModes) {
|
for (ExecutionMode executionMode : executionModes) {
|
||||||
logger.info("Execution mode:" + executionMode);
|
logger.info("Execution mode:" + executionMode);
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(executionMode.toString())
|
.executionHint(executionMode == null ? null : executionMode.toString())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME))
|
.field(SINGLE_VALUED_FIELD_NAME))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
@ -319,7 +328,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void singleValueField_WithMaxSize() throws Exception {
|
public void singleValueField_WithMaxSize() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
.size(20)
|
.size(20)
|
||||||
.order(Terms.Order.term(true))) // we need to sort by terms cause we're checking the first 20 values
|
.order(Terms.Order.term(true))) // we need to sort by terms cause we're checking the first 20 values
|
||||||
|
@ -344,7 +353,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void singleValueField_OrderedByTermAsc() throws Exception {
|
public void singleValueField_OrderedByTermAsc() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
.order(Terms.Order.term(true)))
|
.order(Terms.Order.term(true)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
@ -369,7 +378,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void singleValueField_OrderedByTermDesc() throws Exception {
|
public void singleValueField_OrderedByTermDesc() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
.order(Terms.Order.term(false)))
|
.order(Terms.Order.term(false)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
@ -448,7 +457,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void singleValuedField_WithValueScript() throws Exception {
|
public void singleValuedField_WithValueScript() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
.script("'foo_' + _value"))
|
.script("'foo_' + _value"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
@ -472,7 +481,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void multiValuedField_WithValueScript_NotUnique() throws Exception {
|
public void multiValuedField_WithValueScript_NotUnique() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(MULTI_VALUED_FIELD_NAME)
|
.field(MULTI_VALUED_FIELD_NAME)
|
||||||
.script("_value.substring(0,3)"))
|
.script("_value.substring(0,3)"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
@ -494,7 +503,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void multiValuedField() throws Exception {
|
public void multiValuedField() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(MULTI_VALUED_FIELD_NAME))
|
.field(MULTI_VALUED_FIELD_NAME))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
@ -521,7 +530,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void multiValuedField_WithValueScript() throws Exception {
|
public void multiValuedField_WithValueScript() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(MULTI_VALUED_FIELD_NAME)
|
.field(MULTI_VALUED_FIELD_NAME)
|
||||||
.script("'foo_' + _value"))
|
.script("'foo_' + _value"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
@ -602,7 +611,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void script_SingleValue() throws Exception {
|
public void script_SingleValue() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.script("doc['" + SINGLE_VALUED_FIELD_NAME + "'].value"))
|
.script("doc['" + SINGLE_VALUED_FIELD_NAME + "'].value"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
@ -625,7 +634,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void script_SingleValue_ExplicitSingleValue() throws Exception {
|
public void script_SingleValue_ExplicitSingleValue() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.script("doc['" + SINGLE_VALUED_FIELD_NAME + "'].value"))
|
.script("doc['" + SINGLE_VALUED_FIELD_NAME + "'].value"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
@ -675,7 +684,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void script_MultiValued() throws Exception {
|
public void script_MultiValued() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.script("doc['" + MULTI_VALUED_FIELD_NAME + "'].values"))
|
.script("doc['" + MULTI_VALUED_FIELD_NAME + "'].values"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
@ -736,7 +745,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void unmapped() throws Exception {
|
public void unmapped() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.size(randomInt(5))
|
.size(randomInt(5))
|
||||||
.field(SINGLE_VALUED_FIELD_NAME))
|
.field(SINGLE_VALUED_FIELD_NAME))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
@ -753,7 +762,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
public void partiallyUnmapped() throws Exception {
|
public void partiallyUnmapped() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx", "idx_unmapped").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx", "idx_unmapped").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME))
|
.field(SINGLE_VALUED_FIELD_NAME))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
@ -947,7 +956,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||||
|
|
||||||
client().prepareSearch("idx").setTypes("type")
|
client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomAllExecutionHint())
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
.order(Terms.Order.aggregation("avg_i", true))
|
.order(Terms.Order.aggregation("avg_i", true))
|
||||||
).execute().actionGet();
|
).execute().actionGet();
|
||||||
|
|
Loading…
Reference in New Issue