Made the include and exclude support for terms and significant terms aggregations based on global ordinals.
Closes #6000
This commit is contained in:
parent
7980911d96
commit
64c43c6dc0
|
@ -18,7 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.significant;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
|
@ -27,6 +26,7 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
|||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
||||
|
@ -46,11 +46,10 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
|
||||
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||
long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount,
|
||||
AggregationContext aggregationContext, Aggregator parent,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||
SignificantTermsAggregatorFactory termsAggFactory) {
|
||||
|
||||
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize,
|
||||
minDocCount, aggregationContext, parent);
|
||||
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||
this.termsAggFactory = termsAggFactory;
|
||||
this.shardMinDocCount = shardMinDocCount;
|
||||
}
|
||||
|
@ -80,8 +79,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
|
||||
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
|
||||
SignificantStringTerms.Bucket spare = null;
|
||||
for (long termOrd = Ordinals.MIN_ORDINAL; termOrd < globalOrdinals.getMaxOrd(); ++termOrd) {
|
||||
final long bucketOrd = getBucketOrd(termOrd);
|
||||
for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) {
|
||||
final long bucketOrd = getBucketOrd(globalTermOrd);
|
||||
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
|
||||
if (minDocCount > 0 && bucketDocCount == 0) {
|
||||
continue;
|
||||
|
@ -90,7 +89,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
|
||||
}
|
||||
spare.bucketOrd = bucketOrd;
|
||||
copy(globalValues.getValueByOrd(termOrd), spare.termBytes);
|
||||
copy(globalValues.getValueByOrd(globalTermOrd), spare.termBytes);
|
||||
spare.subsetDf = bucketDocCount;
|
||||
spare.subsetSize = subsetSize;
|
||||
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
|
||||
|
@ -136,17 +135,11 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
|
||||
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggFactory);
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
|
||||
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggFactory);
|
||||
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext reader) {
|
||||
globalValues = valuesSource.globalBytesValues();
|
||||
globalOrdinals = globalValues.ordinals();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrdinal) throws IOException {
|
||||
numCollectedDocs++;
|
||||
|
|
|
@ -87,13 +87,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
|
||||
if (includeExclude != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||
}
|
||||
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
|
||||
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
|
||||
long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
|
||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory);
|
||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -108,10 +105,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
|
||||
if (includeExclude != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||
}
|
||||
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory);
|
||||
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -201,8 +195,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
}
|
||||
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
|
||||
execution = ExecutionMode.MAP;
|
||||
} else if (includeExclude != null) {
|
||||
execution = ExecutionMode.MAP;
|
||||
}
|
||||
if (execution == null) {
|
||||
if (Aggregator.hasParentBucketAggregator(parent)) {
|
||||
|
|
|
@ -38,6 +38,10 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
|
|||
private int minDocCount = SignificantTermsParser.DEFAULT_MIN_DOC_COUNT;
|
||||
private int shardMinDocCount = SignificantTermsParser.DEFAULT_SHARD_MIN_DOC_COUNT;
|
||||
private String executionHint;
|
||||
private String includePattern;
|
||||
private int includeFlags;
|
||||
private String excludePattern;
|
||||
private int excludeFlags;
|
||||
|
||||
public SignificantTermsBuilder(String name) {
|
||||
super(name, SignificantStringTerms.TYPE.name());
|
||||
|
@ -73,6 +77,50 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see #include(String, int)
|
||||
*/
|
||||
public SignificantTermsBuilder include(String regex) {
|
||||
return include(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see java.util.regex.Pattern#compile(String, int)
|
||||
*/
|
||||
public SignificantTermsBuilder include(String regex, int flags) {
|
||||
this.includePattern = regex;
|
||||
this.includeFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see #exclude(String, int)
|
||||
*/
|
||||
public SignificantTermsBuilder exclude(String regex) {
|
||||
return exclude(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see java.util.regex.Pattern#compile(String, int)
|
||||
*/
|
||||
public SignificantTermsBuilder exclude(String regex, int flags) {
|
||||
this.excludePattern = regex;
|
||||
this.excludeFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
|
@ -94,6 +142,26 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
|
|||
if (executionHint != null) {
|
||||
builder.field("execution_hint", executionHint);
|
||||
}
|
||||
if (includePattern != null) {
|
||||
if (includeFlags == 0) {
|
||||
builder.field("include", includePattern);
|
||||
} else {
|
||||
builder.startObject("include")
|
||||
.field("pattern", includePattern)
|
||||
.field("flags", includeFlags)
|
||||
.endObject();
|
||||
}
|
||||
}
|
||||
if (excludePattern != null) {
|
||||
if (excludeFlags == 0) {
|
||||
builder.field("exclude", excludePattern);
|
||||
} else {
|
||||
builder.startObject("exclude")
|
||||
.field("pattern", excludePattern)
|
||||
.field("flags", excludeFlags)
|
||||
.endObject();
|
||||
}
|
||||
}
|
||||
|
||||
return builder.endObject();
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
|
@ -34,6 +35,7 @@ import org.elasticsearch.search.aggregations.Aggregator;
|
|||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
|
||||
|
@ -46,13 +48,25 @@ import java.util.Arrays;
|
|||
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
|
||||
|
||||
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
|
||||
private final IncludeExclude includeExclude;
|
||||
|
||||
protected BytesValues.WithOrdinals globalValues;
|
||||
protected Ordinals.Docs globalOrdinals;
|
||||
|
||||
// TODO: cache the acceptedGlobalOrdinals per aggregation definition.
|
||||
// We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution.
|
||||
// If aggs with same field, but different include/exclude are defined, then the last defined one will override the
|
||||
// first defined one.
|
||||
// So currently for each instance of this aggregator the acceptedGlobalOrdinals will be computed, this is unnecessary
|
||||
// especially if this agg is on a second layer or deeper.
|
||||
private LongBitSet acceptedGlobalOrdinals;
|
||||
|
||||
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, factories, maxOrd, aggregationContext, parent, order, requiredSize, shardSize, minDocCount);
|
||||
this.valuesSource = valuesSource;
|
||||
this.includeExclude = includeExclude;
|
||||
}
|
||||
|
||||
protected long getBucketOrd(long termOrd) {
|
||||
|
@ -68,6 +82,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
public void setNextReader(AtomicReaderContext reader) {
|
||||
globalValues = valuesSource.globalBytesValues();
|
||||
globalOrdinals = globalValues.ordinals();
|
||||
if (acceptedGlobalOrdinals != null) {
|
||||
globalOrdinals = new FilteredOrdinals(globalOrdinals, acceptedGlobalOrdinals);
|
||||
} else if (includeExclude != null) {
|
||||
acceptedGlobalOrdinals = includeExclude.acceptedGlobalOrdinals(globalOrdinals, valuesSource);
|
||||
globalOrdinals = new FilteredOrdinals(globalOrdinals, acceptedGlobalOrdinals);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -103,8 +123,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
StringTerms.Bucket spare = null;
|
||||
for (long termOrd = Ordinals.MIN_ORDINAL; termOrd < globalOrdinals.getMaxOrd(); ++termOrd) {
|
||||
final long bucketOrd = getBucketOrd(termOrd);
|
||||
for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) {
|
||||
final long bucketOrd = getBucketOrd(globalTermOrd);
|
||||
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
|
||||
if (minDocCount > 0 && bucketDocCount == 0) {
|
||||
continue;
|
||||
|
@ -114,7 +134,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
spare.bucketOrd = bucketOrd;
|
||||
spare.docCount = bucketDocCount;
|
||||
copy(globalValues.getValueByOrd(termOrd), spare.termBytes);
|
||||
copy(globalValues.getValueByOrd(globalTermOrd), spare.termBytes);
|
||||
spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
|
||||
|
@ -137,19 +157,13 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
private final LongHash bucketOrds;
|
||||
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext,
|
||||
Aggregator parent) {
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext,
|
||||
Aggregator parent) {
|
||||
// Set maxOrd to estimatedBucketCount! To be conservative with memory.
|
||||
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext reader) {
|
||||
globalValues = valuesSource.globalBytesValues();
|
||||
globalOrdinals = globalValues.ordinals();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrdinal) throws IOException {
|
||||
final int numOrds = globalOrdinals.setDocument(doc);
|
||||
|
@ -191,7 +205,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
|
||||
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent);
|
||||
this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true);
|
||||
}
|
||||
|
||||
|
@ -210,7 +224,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
mapSegmentCountsToGlobalCounts();
|
||||
}
|
||||
|
||||
super.setNextReader(reader);
|
||||
globalValues = valuesSource.globalBytesValues();
|
||||
globalOrdinals = globalValues.ordinals();
|
||||
|
||||
BytesValues.WithOrdinals bytesValues = valuesSource.bytesValues();
|
||||
segmentOrdinals = bytesValues.ordinals();
|
||||
if (segmentOrdinals.getMaxOrd() != globalOrdinals.getMaxOrd()) {
|
||||
|
@ -251,4 +267,65 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
}
|
||||
|
||||
private static final class FilteredOrdinals implements Ordinals.Docs {
|
||||
|
||||
private final Ordinals.Docs inner;
|
||||
private final LongBitSet accepted;
|
||||
|
||||
private long currentOrd;
|
||||
private long[] buffer = new long[0];
|
||||
private int bufferSlot;
|
||||
|
||||
private FilteredOrdinals(Ordinals.Docs inner, LongBitSet accepted) {
|
||||
this.inner = inner;
|
||||
this.accepted = accepted;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMaxOrd() {
|
||||
return inner.getMaxOrd();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return inner.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getOrd(int docId) {
|
||||
long ord = inner.getOrd(docId);
|
||||
if (accepted.get(ord)) {
|
||||
return currentOrd = ord;
|
||||
} else {
|
||||
return currentOrd = Ordinals.MISSING_ORDINAL;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
return currentOrd = buffer[bufferSlot++];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
int numDocs = inner.setDocument(docId);
|
||||
buffer = ArrayUtil.grow(buffer, numDocs);
|
||||
bufferSlot = 0;
|
||||
|
||||
int numAcceptedOrds = 0;
|
||||
for (int slot = 0; slot < numDocs; slot++) {
|
||||
long ord = inner.nextOrd();
|
||||
if (accepted.get(ord)) {
|
||||
buffer[numAcceptedOrds] = ord;
|
||||
numAcceptedOrds++;
|
||||
}
|
||||
}
|
||||
return numAcceptedOrds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long currentOrd() {
|
||||
return currentOrd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,10 +74,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent) {
|
||||
if (includeExclude != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||
}
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,10 +89,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent) {
|
||||
if (includeExclude != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
|
||||
}
|
||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -218,8 +212,6 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|||
// In some cases, using ordinals is just not supported: override it
|
||||
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
|
||||
execution = ExecutionMode.MAP;
|
||||
} else if (includeExclude != null) {
|
||||
execution = ExecutionMode.MAP;
|
||||
}
|
||||
|
||||
final long maxOrd;
|
||||
|
|
|
@ -18,13 +18,14 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.terms.support;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.search.SearchParseException;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -71,6 +72,24 @@ public class IncludeExclude {
|
|||
return !exclude.reset(scratch).matches();
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
||||
*/
|
||||
public LongBitSet acceptedGlobalOrdinals(Ordinals.Docs globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
|
||||
TermsEnum globalTermsEnum = valueSource.getGlobalTermsEnum();
|
||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getMaxOrd());
|
||||
try {
|
||||
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
||||
if (accept(term)) {
|
||||
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw ExceptionsHelper.convertToElastic(e);
|
||||
}
|
||||
return acceptedGlobalOrdinals;
|
||||
}
|
||||
|
||||
public static class Parser {
|
||||
|
||||
private final String aggName;
|
||||
|
@ -152,4 +171,5 @@ public class IncludeExclude {
|
|||
return new IncludeExclude(includePattern, excludePattern);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,16 +21,15 @@ package org.elasticsearch.search.aggregations.support;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.common.lucene.ReaderContextAware;
|
||||
import org.elasticsearch.common.lucene.TopReaderContextAware;
|
||||
import org.elasticsearch.common.util.CollectionUtils;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData.Order;
|
||||
import org.elasticsearch.index.fielddata.LongValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.script.SearchScript;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues;
|
||||
|
@ -164,6 +163,8 @@ public abstract class ValuesSource {
|
|||
|
||||
public abstract long globalMaxOrd(IndexSearcher indexSearcher);
|
||||
|
||||
public abstract TermsEnum getGlobalTermsEnum();
|
||||
|
||||
public static class FieldData extends WithOrdinals implements ReaderContextAware {
|
||||
|
||||
protected boolean needsHashes;
|
||||
|
@ -178,6 +179,8 @@ public abstract class ValuesSource {
|
|||
protected AtomicFieldData.WithOrdinals<?> globalAtomicFieldData;
|
||||
private BytesValues.WithOrdinals globalBytesValues;
|
||||
|
||||
private long maxOrd = -1;
|
||||
|
||||
public FieldData(IndexFieldData.WithOrdinals<?> indexFieldData, MetaData metaData) {
|
||||
this.indexFieldData = indexFieldData;
|
||||
this.metaData = metaData;
|
||||
|
@ -237,18 +240,27 @@ public abstract class ValuesSource {
|
|||
|
||||
@Override
|
||||
public long globalMaxOrd(IndexSearcher indexSearcher) {
|
||||
if (maxOrd != -1) {
|
||||
return maxOrd;
|
||||
}
|
||||
|
||||
IndexReader indexReader = indexSearcher.getIndexReader();
|
||||
if (indexReader.leaves().isEmpty()) {
|
||||
return 0;
|
||||
return maxOrd = 0;
|
||||
} else {
|
||||
AtomicReaderContext atomicReaderContext = indexReader.leaves().get(0);
|
||||
IndexFieldData.WithOrdinals<?> globalFieldData = indexFieldData.loadGlobal(indexReader);
|
||||
AtomicFieldData.WithOrdinals afd = globalFieldData.load(atomicReaderContext);
|
||||
BytesValues.WithOrdinals values = afd.getBytesValues(false);
|
||||
Ordinals.Docs ordinals = values.ordinals();
|
||||
return ordinals.getMaxOrd();
|
||||
return maxOrd = ordinals.getMaxOrd();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum getGlobalTermsEnum() {
|
||||
return globalAtomicFieldData.getTermsEnum();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -33,14 +33,16 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
|
|||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
import static org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.hasSize;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -49,7 +51,13 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
public class SignificantTermsTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
public String randomExecutionHint() {
|
||||
return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString();
|
||||
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
|
||||
}
|
||||
|
||||
public String randomExecutionHintNoOrdinals() {
|
||||
EnumSet<SignificantTermsAggregatorFactory.ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
|
||||
modes.remove(ExecutionMode.ORDINALS);
|
||||
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -117,6 +125,42 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
|
|||
Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber();
|
||||
assertTrue(topCategory.equals(new Long(SNOWBOARDING_CATEGORY)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void includeExclude() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("test")
|
||||
.setQuery(new TermQueryBuilder("_all", "weller"))
|
||||
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals())
|
||||
.exclude("weller"))
|
||||
.get();
|
||||
assertSearchResponse(response);
|
||||
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
|
||||
Set<String> terms = new HashSet<>();
|
||||
for (Bucket topTerm : topTerms) {
|
||||
terms.add(topTerm.getKey());
|
||||
}
|
||||
assertThat(terms, hasSize(6));
|
||||
assertThat(terms.contains("jam"), is(true));
|
||||
assertThat(terms.contains("council"), is(true));
|
||||
assertThat(terms.contains("style"), is(true));
|
||||
assertThat(terms.contains("paul"), is(true));
|
||||
assertThat(terms.contains("of"), is(true));
|
||||
assertThat(terms.contains("the"), is(true));
|
||||
|
||||
response = client().prepareSearch("test")
|
||||
.setQuery(new TermQueryBuilder("_all", "weller"))
|
||||
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals())
|
||||
.include("weller"))
|
||||
.get();
|
||||
assertSearchResponse(response);
|
||||
topTerms = response.getAggregations().get("mySignificantTerms");
|
||||
terms = new HashSet<>();
|
||||
for (Bucket topTerm : topTerms) {
|
||||
terms.add(topTerm.getKey());
|
||||
}
|
||||
assertThat(terms, hasSize(1));
|
||||
assertThat(terms.contains("weller"), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void unmapped() throws Exception {
|
||||
|
@ -125,7 +169,7 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
|
|||
.setQuery(new TermQueryBuilder("_all", "terje"))
|
||||
.setFrom(0).setSize(60).setExplain(true)
|
||||
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint())
|
||||
.minDocCount(2))
|
||||
.minDocCount(2))
|
||||
.execute()
|
||||
.actionGet();
|
||||
assertSearchResponse(response);
|
||||
|
|
|
@ -66,6 +66,13 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
|
||||
}
|
||||
|
||||
public static String randomExecutionHintNoOrdinals() {
|
||||
EnumSet<ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
|
||||
modes.remove(ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY);
|
||||
modes.remove(ExecutionMode.ORDINALS);
|
||||
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
|
||||
}
|
||||
|
||||
public static String randomAllExecutionHint() {
|
||||
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
|
||||
}
|
||||
|
@ -189,6 +196,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHintNoOrdinals())
|
||||
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -211,6 +219,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHintNoOrdinals())
|
||||
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("(val000|val001)"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -233,6 +242,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHintNoOrdinals())
|
||||
.field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+"))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -260,6 +270,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHintNoOrdinals())
|
||||
.field(SINGLE_VALUED_FIELD_NAME).include("VAL00.+", Pattern.CASE_INSENSITIVE))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -283,6 +294,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHintNoOrdinals())
|
||||
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -306,6 +318,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHintNoOrdinals())
|
||||
.field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+", 0))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
Loading…
Reference in New Issue