Made the include and exclude support for terms and significant terms aggregations based on global ordinals.

Closes #6000
This commit is contained in:
Martijn van Groningen 2014-04-30 16:21:22 +07:00
parent 7980911d96
commit 64c43c6dc0
9 changed files with 275 additions and 64 deletions

View File

@ -18,7 +18,6 @@
*/
package org.elasticsearch.search.aggregations.bucket.significant;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasables;
@ -27,6 +26,7 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.internal.ContextIndexSearcher;
@ -46,11 +46,10 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount,
AggregationContext aggregationContext, Aggregator parent,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize,
minDocCount, aggregationContext, parent);
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
this.termsAggFactory = termsAggFactory;
this.shardMinDocCount = shardMinDocCount;
}
@ -80,8 +79,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
SignificantStringTerms.Bucket spare = null;
for (long termOrd = Ordinals.MIN_ORDINAL; termOrd < globalOrdinals.getMaxOrd(); ++termOrd) {
final long bucketOrd = getBucketOrd(termOrd);
for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) {
final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (minDocCount > 0 && bucketDocCount == 0) {
continue;
@ -90,7 +89,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
}
spare.bucketOrd = bucketOrd;
copy(globalValues.getValueByOrd(termOrd), spare.termBytes);
copy(globalValues.getValueByOrd(globalTermOrd), spare.termBytes);
spare.subsetDf = bucketDocCount;
spare.subsetSize = subsetSize;
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
@ -136,17 +135,11 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggFactory);
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggFactory);
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
}
@Override
public void setNextReader(AtomicReaderContext reader) {
globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals();
}
@Override
public void collect(int doc, long owningBucketOrdinal) throws IOException {
numCollectedDocs++;

View File

@ -87,13 +87,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
}
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory);
}
@Override
@ -108,10 +105,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
}
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory);
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory);
}
@Override
@ -201,8 +195,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
}
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
execution = ExecutionMode.MAP;
} else if (includeExclude != null) {
execution = ExecutionMode.MAP;
}
if (execution == null) {
if (Aggregator.hasParentBucketAggregator(parent)) {

View File

@ -38,6 +38,10 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
private int minDocCount = SignificantTermsParser.DEFAULT_MIN_DOC_COUNT;
private int shardMinDocCount = SignificantTermsParser.DEFAULT_SHARD_MIN_DOC_COUNT;
private String executionHint;
private String includePattern;
private int includeFlags;
private String excludePattern;
private int excludeFlags;
public SignificantTermsBuilder(String name) {
super(name, SignificantStringTerms.TYPE.name());
@ -73,6 +77,50 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
return this;
}
/**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class.
*
* @see #include(String, int)
*/
public SignificantTermsBuilder include(String regex) {
return include(regex, 0);
}
/**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class.
*
* @see java.util.regex.Pattern#compile(String, int)
*/
public SignificantTermsBuilder include(String regex, int flags) {
this.includePattern = regex;
this.includeFlags = flags;
return this;
}
/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
*
* @see #exclude(String, int)
*/
public SignificantTermsBuilder exclude(String regex) {
return exclude(regex, 0);
}
/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
*
* @see java.util.regex.Pattern#compile(String, int)
*/
public SignificantTermsBuilder exclude(String regex, int flags) {
this.excludePattern = regex;
this.excludeFlags = flags;
return this;
}
@Override
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
@ -94,6 +142,26 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
if (executionHint != null) {
builder.field("execution_hint", executionHint);
}
if (includePattern != null) {
if (includeFlags == 0) {
builder.field("include", includePattern);
} else {
builder.startObject("include")
.field("pattern", includePattern)
.field("flags", includeFlags)
.endObject();
}
}
if (excludePattern != null) {
if (excludeFlags == 0) {
builder.field("exclude", excludePattern);
} else {
builder.startObject("exclude")
.field("pattern", excludePattern)
.field("flags", excludeFlags)
.endObject();
}
}
return builder.endObject();
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.lease.Releasables;
@ -34,6 +35,7 @@ import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
@ -46,13 +48,25 @@ import java.util.Arrays;
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
private final IncludeExclude includeExclude;
protected BytesValues.WithOrdinals globalValues;
protected Ordinals.Docs globalOrdinals;
// TODO: cache the acceptedGlobalOrdinals per aggregation definition.
// We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution.
// If aggs with same field, but different include/exclude are defined, then the last defined one will override the
// first defined one.
// So currently for each instance of this aggregator the acceptedGlobalOrdinals will be computed, this is unnecessary
// especially if this agg is on a second layer or deeper.
private LongBitSet acceptedGlobalOrdinals;
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, maxOrd, aggregationContext, parent, order, requiredSize, shardSize, minDocCount);
this.valuesSource = valuesSource;
this.includeExclude = includeExclude;
}
protected long getBucketOrd(long termOrd) {
@ -68,6 +82,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
public void setNextReader(AtomicReaderContext reader) {
globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals();
if (acceptedGlobalOrdinals != null) {
globalOrdinals = new FilteredOrdinals(globalOrdinals, acceptedGlobalOrdinals);
} else if (includeExclude != null) {
acceptedGlobalOrdinals = includeExclude.acceptedGlobalOrdinals(globalOrdinals, valuesSource);
globalOrdinals = new FilteredOrdinals(globalOrdinals, acceptedGlobalOrdinals);
}
}
@Override
@ -103,8 +123,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
StringTerms.Bucket spare = null;
for (long termOrd = Ordinals.MIN_ORDINAL; termOrd < globalOrdinals.getMaxOrd(); ++termOrd) {
final long bucketOrd = getBucketOrd(termOrd);
for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) {
final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (minDocCount > 0 && bucketDocCount == 0) {
continue;
@ -114,7 +134,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
spare.bucketOrd = bucketOrd;
spare.docCount = bucketDocCount;
copy(globalValues.getValueByOrd(termOrd), spare.termBytes);
copy(globalValues.getValueByOrd(globalTermOrd), spare.termBytes);
spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare);
}
@ -137,19 +157,13 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext,
Aggregator parent) {
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext,
Aggregator parent) {
// Set maxOrd to estimatedBucketCount! To be conservative with memory.
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
}
@Override
public void setNextReader(AtomicReaderContext reader) {
globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals();
}
@Override
public void collect(int doc, long owningBucketOrdinal) throws IOException {
final int numOrds = globalOrdinals.setDocument(doc);
@ -191,7 +205,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent);
this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true);
}
@ -210,7 +224,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
mapSegmentCountsToGlobalCounts();
}
super.setNextReader(reader);
globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals();
BytesValues.WithOrdinals bytesValues = valuesSource.bytesValues();
segmentOrdinals = bytesValues.ordinals();
if (segmentOrdinals.getMaxOrd() != globalOrdinals.getMaxOrd()) {
@ -251,4 +267,65 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
}
private static final class FilteredOrdinals implements Ordinals.Docs {
private final Ordinals.Docs inner;
private final LongBitSet accepted;
private long currentOrd;
private long[] buffer = new long[0];
private int bufferSlot;
private FilteredOrdinals(Ordinals.Docs inner, LongBitSet accepted) {
this.inner = inner;
this.accepted = accepted;
}
@Override
public long getMaxOrd() {
return inner.getMaxOrd();
}
@Override
public boolean isMultiValued() {
return inner.isMultiValued();
}
@Override
public long getOrd(int docId) {
long ord = inner.getOrd(docId);
if (accepted.get(ord)) {
return currentOrd = ord;
} else {
return currentOrd = Ordinals.MISSING_ORDINAL;
}
}
@Override
public long nextOrd() {
return currentOrd = buffer[bufferSlot++];
}
@Override
public int setDocument(int docId) {
int numDocs = inner.setDocument(docId);
buffer = ArrayUtil.grow(buffer, numDocs);
bufferSlot = 0;
int numAcceptedOrds = 0;
for (int slot = 0; slot < numDocs; slot++) {
long ord = inner.nextOrd();
if (accepted.get(ord)) {
buffer[numAcceptedOrds] = ord;
numAcceptedOrds++;
}
}
return numAcceptedOrds;
}
@Override
public long currentOrd() {
return currentOrd;
}
}
}

View File

@ -74,10 +74,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
}
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
}
@Override
@ -92,10 +89,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
}
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
}
@Override
@ -218,8 +212,6 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
// In some cases, using ordinals is just not supported: override it
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
execution = ExecutionMode.MAP;
} else if (includeExclude != null) {
execution = ExecutionMode.MAP;
}
final long maxOrd;

View File

@ -18,13 +18,14 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms.support;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.*;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
@ -71,6 +72,24 @@ public class IncludeExclude {
return !exclude.reset(scratch).matches();
}
/**
* Computes which global ordinals are accepted by this IncludeExclude instance.
*/
public LongBitSet acceptedGlobalOrdinals(Ordinals.Docs globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
TermsEnum globalTermsEnum = valueSource.getGlobalTermsEnum();
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getMaxOrd());
try {
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
if (accept(term)) {
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
}
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
return acceptedGlobalOrdinals;
}
public static class Parser {
private final String aggName;
@ -152,4 +171,5 @@ public class IncludeExclude {
return new IncludeExclude(includePattern, excludePattern);
}
}
}

View File

@ -21,16 +21,15 @@ package org.elasticsearch.search.aggregations.support;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefArray;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.*;
import org.elasticsearch.common.lucene.ReaderContextAware;
import org.elasticsearch.common.lucene.TopReaderContextAware;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.AtomicFieldData.Order;
import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues;
@ -164,6 +163,8 @@ public abstract class ValuesSource {
public abstract long globalMaxOrd(IndexSearcher indexSearcher);
public abstract TermsEnum getGlobalTermsEnum();
public static class FieldData extends WithOrdinals implements ReaderContextAware {
protected boolean needsHashes;
@ -178,6 +179,8 @@ public abstract class ValuesSource {
protected AtomicFieldData.WithOrdinals<?> globalAtomicFieldData;
private BytesValues.WithOrdinals globalBytesValues;
private long maxOrd = -1;
public FieldData(IndexFieldData.WithOrdinals<?> indexFieldData, MetaData metaData) {
this.indexFieldData = indexFieldData;
this.metaData = metaData;
@ -237,18 +240,27 @@ public abstract class ValuesSource {
@Override
public long globalMaxOrd(IndexSearcher indexSearcher) {
if (maxOrd != -1) {
return maxOrd;
}
IndexReader indexReader = indexSearcher.getIndexReader();
if (indexReader.leaves().isEmpty()) {
return 0;
return maxOrd = 0;
} else {
AtomicReaderContext atomicReaderContext = indexReader.leaves().get(0);
IndexFieldData.WithOrdinals<?> globalFieldData = indexFieldData.loadGlobal(indexReader);
AtomicFieldData.WithOrdinals afd = globalFieldData.load(atomicReaderContext);
BytesValues.WithOrdinals values = afd.getBytesValues(false);
Ordinals.Docs ordinals = values.ordinals();
return ordinals.getMaxOrd();
return maxOrd = ordinals.getMaxOrd();
}
}
@Override
public TermsEnum getGlobalTermsEnum() {
return globalAtomicFieldData.getTermsEnum();
}
}
}

View File

@ -33,14 +33,16 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.util.HashMap;
import java.util.HashSet;
import java.util.*;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
/**
*
@ -49,7 +51,13 @@ import static org.hamcrest.Matchers.equalTo;
public class SignificantTermsTests extends ElasticsearchIntegrationTest {
public String randomExecutionHint() {
return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString();
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
}
public String randomExecutionHintNoOrdinals() {
EnumSet<SignificantTermsAggregatorFactory.ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
modes.remove(ExecutionMode.ORDINALS);
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
}
@Override
@ -117,6 +125,42 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber();
assertTrue(topCategory.equals(new Long(SNOWBOARDING_CATEGORY)));
}
@Test
public void includeExclude() throws Exception {
SearchResponse response = client().prepareSearch("test")
.setQuery(new TermQueryBuilder("_all", "weller"))
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals())
.exclude("weller"))
.get();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
Set<String> terms = new HashSet<>();
for (Bucket topTerm : topTerms) {
terms.add(topTerm.getKey());
}
assertThat(terms, hasSize(6));
assertThat(terms.contains("jam"), is(true));
assertThat(terms.contains("council"), is(true));
assertThat(terms.contains("style"), is(true));
assertThat(terms.contains("paul"), is(true));
assertThat(terms.contains("of"), is(true));
assertThat(terms.contains("the"), is(true));
response = client().prepareSearch("test")
.setQuery(new TermQueryBuilder("_all", "weller"))
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals())
.include("weller"))
.get();
assertSearchResponse(response);
topTerms = response.getAggregations().get("mySignificantTerms");
terms = new HashSet<>();
for (Bucket topTerm : topTerms) {
terms.add(topTerm.getKey());
}
assertThat(terms, hasSize(1));
assertThat(terms.contains("weller"), is(true));
}
@Test
public void unmapped() throws Exception {
@ -125,7 +169,7 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
.setQuery(new TermQueryBuilder("_all", "terje"))
.setFrom(0).setSize(60).setExplain(true)
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint())
.minDocCount(2))
.minDocCount(2))
.execute()
.actionGet();
assertSearchResponse(response);

View File

@ -66,6 +66,13 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
}
public static String randomExecutionHintNoOrdinals() {
EnumSet<ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
modes.remove(ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY);
modes.remove(ExecutionMode.ORDINALS);
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
}
public static String randomAllExecutionHint() {
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
}
@ -189,6 +196,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+"))
.execute().actionGet();
@ -211,6 +219,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("(val000|val001)"))
.execute().actionGet();
@ -233,6 +242,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+"))
.execute().actionGet();
@ -260,6 +270,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.field(SINGLE_VALUED_FIELD_NAME).include("VAL00.+", Pattern.CASE_INSENSITIVE))
.execute().actionGet();
@ -283,6 +294,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
.execute().actionGet();
@ -306,6 +318,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+", 0))
.execute().actionGet();