From 64c43c6dc06855ad001cff4f6e5a358efa5567b9 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 30 Apr 2014 16:21:22 +0700 Subject: [PATCH] Made the include and exclude support for terms and significant terms aggregations based on global ordinals. Closes #6000 --- ...balOrdinalsSignificantTermsAggregator.java | 23 ++-- .../SignificantTermsAggregatorFactory.java | 12 +- .../significant/SignificantTermsBuilder.java | 68 +++++++++++ .../GlobalOrdinalsStringTermsAggregator.java | 107 +++++++++++++++--- .../bucket/terms/TermsAggregatorFactory.java | 12 +- .../bucket/terms/support/IncludeExclude.java | 28 ++++- .../aggregations/support/ValuesSource.java | 24 +++- .../bucket/SignificantTermsTests.java | 52 ++++++++- .../aggregations/bucket/StringTermsTests.java | 13 +++ 9 files changed, 275 insertions(+), 64 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index a23b99149cb..22f3bf3e56f 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.search.aggregations.bucket.significant; -import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lease.Releasables; @@ -27,6 +26,7 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.ContextIndexSearcher; @@ -46,11 +46,10 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, - AggregationContext aggregationContext, Aggregator parent, + IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, - minDocCount, aggregationContext, parent); + super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); this.termsAggFactory = termsAggFactory; this.shardMinDocCount = shardMinDocCount; } @@ -80,8 +79,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size); SignificantStringTerms.Bucket spare = null; - for (long termOrd = Ordinals.MIN_ORDINAL; termOrd < globalOrdinals.getMaxOrd(); ++termOrd) { - final long bucketOrd = getBucketOrd(termOrd); + for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) { + final long bucketOrd = getBucketOrd(globalTermOrd); final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); if (minDocCount > 0 && bucketDocCount == 0) { continue; @@ -90,7 +89,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null); } spare.bucketOrd = bucketOrd; - copy(globalValues.getValueByOrd(termOrd), spare.termBytes); + copy(globalValues.getValueByOrd(globalTermOrd), spare.termBytes); spare.subsetDf = bucketDocCount; spare.subsetSize = subsetSize; spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes); @@ -136,17 +135,11 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri private final LongHash bucketOrds; - public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggFactory); + public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { + super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggFactory); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } - @Override - public void setNextReader(AtomicReaderContext reader) { - globalValues = valuesSource.globalBytesValues(); - globalOrdinals = globalValues.ordinals(); - } - @Override public void collect(int doc, long owningBucketOrdinal) throws IOException { numCollectedDocs++; diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 132bbd357e6..1d7a1c84214 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -87,13 +87,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { - if (includeExclude != null) { - throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); - } ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; IndexSearcher indexSearcher = aggregationContext.searchContext().searcher(); long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher); - return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); + return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -108,10 +105,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { - if (includeExclude != null) { - throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); - } - return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); + return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -201,8 +195,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) { execution = ExecutionMode.MAP; - } else if (includeExclude != null) { - execution = ExecutionMode.MAP; } if (execution == null) { if (Aggregator.hasParentBucketAggregator(parent)) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index 4b6402facd5..4e7eb104573 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -38,6 +38,10 @@ public class SignificantTermsBuilder extends AggregationBuilder 0 && bucketDocCount == 0) { continue; @@ -114,7 +134,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr } spare.bucketOrd = bucketOrd; spare.docCount = bucketDocCount; - copy(globalValues.getValueByOrd(termOrd), spare.termBytes); + copy(globalValues.getValueByOrd(globalTermOrd), spare.termBytes); spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare); } @@ -137,19 +157,13 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private final LongHash bucketOrds; public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, - Aggregator parent) { + long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, + Aggregator parent) { // Set maxOrd to estimatedBucketCount! To be conservative with memory. - super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } - @Override - public void setNextReader(AtomicReaderContext reader) { - globalValues = valuesSource.globalBytesValues(); - globalOrdinals = globalValues.ordinals(); - } - @Override public void collect(int doc, long owningBucketOrdinal) throws IOException { final int numOrds = globalOrdinals.setDocument(doc); @@ -191,7 +205,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { - super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent); this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true); } @@ -210,7 +224,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr mapSegmentCountsToGlobalCounts(); } - super.setNextReader(reader); + globalValues = valuesSource.globalBytesValues(); + globalOrdinals = globalValues.ordinals(); + BytesValues.WithOrdinals bytesValues = valuesSource.bytesValues(); segmentOrdinals = bytesValues.ordinals(); if (segmentOrdinals.getMaxOrd() != globalOrdinals.getMaxOrd()) { @@ -251,4 +267,65 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr } } + private static final class FilteredOrdinals implements Ordinals.Docs { + + private final Ordinals.Docs inner; + private final LongBitSet accepted; + + private long currentOrd; + private long[] buffer = new long[0]; + private int bufferSlot; + + private FilteredOrdinals(Ordinals.Docs inner, LongBitSet accepted) { + this.inner = inner; + this.accepted = accepted; + } + + @Override + public long getMaxOrd() { + return inner.getMaxOrd(); + } + + @Override + public boolean isMultiValued() { + return inner.isMultiValued(); + } + + @Override + public long getOrd(int docId) { + long ord = inner.getOrd(docId); + if (accepted.get(ord)) { + return currentOrd = ord; + } else { + return currentOrd = Ordinals.MISSING_ORDINAL; + } + } + + @Override + public long nextOrd() { + return currentOrd = buffer[bufferSlot++]; + } + + @Override + public int setDocument(int docId) { + int numDocs = inner.setDocument(docId); + buffer = ArrayUtil.grow(buffer, numDocs); + bufferSlot = 0; + + int numAcceptedOrds = 0; + for (int slot = 0; slot < numDocs; slot++) { + long ord = inner.nextOrd(); + if (accepted.get(ord)) { + buffer[numAcceptedOrds] = ord; + numAcceptedOrds++; + } + } + return numAcceptedOrds; + } + + @Override + public long currentOrd() { + return currentOrd; + } + } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 2a0834e3918..d97755a6526 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -74,10 +74,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - if (includeExclude != null) { - throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); - } - return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); } @Override @@ -92,10 +89,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - if (includeExclude != null) { - throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); - } - return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); } @Override @@ -218,8 +212,6 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { // In some cases, using ordinals is just not supported: override it if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) { execution = ExecutionMode.MAP; - } else if (includeExclude != null) { - execution = ExecutionMode.MAP; } final long maxOrd; diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java index 27c012c6a58..889fec8c43e 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java @@ -18,13 +18,14 @@ */ package org.elasticsearch.search.aggregations.bucket.terms.support; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.*; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.SearchParseException; +import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -71,6 +72,24 @@ public class IncludeExclude { return !exclude.reset(scratch).matches(); } + /** + * Computes which global ordinals are accepted by this IncludeExclude instance. + */ + public LongBitSet acceptedGlobalOrdinals(Ordinals.Docs globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) { + TermsEnum globalTermsEnum = valueSource.getGlobalTermsEnum(); + LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getMaxOrd()); + try { + for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) { + if (accept(term)) { + acceptedGlobalOrdinals.set(globalTermsEnum.ord()); + } + } + } catch (IOException e) { + throw ExceptionsHelper.convertToElastic(e); + } + return acceptedGlobalOrdinals; + } + public static class Parser { private final String aggName; @@ -152,4 +171,5 @@ public class IncludeExclude { return new IncludeExclude(includePattern, excludePattern); } } + } diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 4d3373da84d..32a78093d6b 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -21,16 +21,15 @@ package org.elasticsearch.search.aggregations.support; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefArray; -import org.apache.lucene.util.Counter; +import org.apache.lucene.util.*; import org.elasticsearch.common.lucene.ReaderContextAware; import org.elasticsearch.common.lucene.TopReaderContextAware; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.AtomicFieldData.Order; +import org.elasticsearch.index.fielddata.LongValues; import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues; @@ -164,6 +163,8 @@ public abstract class ValuesSource { public abstract long globalMaxOrd(IndexSearcher indexSearcher); + public abstract TermsEnum getGlobalTermsEnum(); + public static class FieldData extends WithOrdinals implements ReaderContextAware { protected boolean needsHashes; @@ -178,6 +179,8 @@ public abstract class ValuesSource { protected AtomicFieldData.WithOrdinals globalAtomicFieldData; private BytesValues.WithOrdinals globalBytesValues; + private long maxOrd = -1; + public FieldData(IndexFieldData.WithOrdinals indexFieldData, MetaData metaData) { this.indexFieldData = indexFieldData; this.metaData = metaData; @@ -237,18 +240,27 @@ public abstract class ValuesSource { @Override public long globalMaxOrd(IndexSearcher indexSearcher) { + if (maxOrd != -1) { + return maxOrd; + } + IndexReader indexReader = indexSearcher.getIndexReader(); if (indexReader.leaves().isEmpty()) { - return 0; + return maxOrd = 0; } else { AtomicReaderContext atomicReaderContext = indexReader.leaves().get(0); IndexFieldData.WithOrdinals globalFieldData = indexFieldData.loadGlobal(indexReader); AtomicFieldData.WithOrdinals afd = globalFieldData.load(atomicReaderContext); BytesValues.WithOrdinals values = afd.getBytesValues(false); Ordinals.Docs ordinals = values.ordinals(); - return ordinals.getMaxOrd(); + return maxOrd = ordinals.getMaxOrd(); } } + + @Override + public TermsEnum getGlobalTermsEnum() { + return globalAtomicFieldData.getTermsEnum(); + } } } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java index 358438ff019..a2365cbec48 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java @@ -33,14 +33,16 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.junit.Test; -import java.util.HashMap; -import java.util.HashSet; +import java.util.*; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; /** * @@ -49,7 +51,13 @@ import static org.hamcrest.Matchers.equalTo; public class SignificantTermsTests extends ElasticsearchIntegrationTest { public String randomExecutionHint() { - return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString(); + return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString(); + } + + public String randomExecutionHintNoOrdinals() { + EnumSet modes = EnumSet.allOf(ExecutionMode.class); + modes.remove(ExecutionMode.ORDINALS); + return randomBoolean() ? null : randomFrom(modes.toArray()).toString(); } @Override @@ -117,6 +125,42 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest { Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber(); assertTrue(topCategory.equals(new Long(SNOWBOARDING_CATEGORY))); } + + @Test + public void includeExclude() throws Exception { + SearchResponse response = client().prepareSearch("test") + .setQuery(new TermQueryBuilder("_all", "weller")) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals()) + .exclude("weller")) + .get(); + assertSearchResponse(response); + SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms"); + Set terms = new HashSet<>(); + for (Bucket topTerm : topTerms) { + terms.add(topTerm.getKey()); + } + assertThat(terms, hasSize(6)); + assertThat(terms.contains("jam"), is(true)); + assertThat(terms.contains("council"), is(true)); + assertThat(terms.contains("style"), is(true)); + assertThat(terms.contains("paul"), is(true)); + assertThat(terms.contains("of"), is(true)); + assertThat(terms.contains("the"), is(true)); + + response = client().prepareSearch("test") + .setQuery(new TermQueryBuilder("_all", "weller")) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals()) + .include("weller")) + .get(); + assertSearchResponse(response); + topTerms = response.getAggregations().get("mySignificantTerms"); + terms = new HashSet<>(); + for (Bucket topTerm : topTerms) { + terms.add(topTerm.getKey()); + } + assertThat(terms, hasSize(1)); + assertThat(terms.contains("weller"), is(true)); + } @Test public void unmapped() throws Exception { @@ -125,7 +169,7 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest { .setQuery(new TermQueryBuilder("_all", "terje")) .setFrom(0).setSize(60).setExplain(true) .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()) - .minDocCount(2)) + .minDocCount(2)) .execute() .actionGet(); assertSearchResponse(response); diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java index 74740cfd36c..a8d8d6e0e5d 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java @@ -66,6 +66,13 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { return randomBoolean() ? null : randomFrom(modes.toArray()).toString(); } + public static String randomExecutionHintNoOrdinals() { + EnumSet modes = EnumSet.allOf(ExecutionMode.class); + modes.remove(ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY); + modes.remove(ExecutionMode.ORDINALS); + return randomBoolean() ? null : randomFrom(modes.toArray()).toString(); + } + public static String randomAllExecutionHint() { return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString(); } @@ -189,6 +196,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHintNoOrdinals()) .field(SINGLE_VALUED_FIELD_NAME).include("val00.+")) .execute().actionGet(); @@ -211,6 +219,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHintNoOrdinals()) .field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("(val000|val001)")) .execute().actionGet(); @@ -233,6 +242,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHintNoOrdinals()) .field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+")) .execute().actionGet(); @@ -260,6 +270,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHintNoOrdinals()) .field(SINGLE_VALUED_FIELD_NAME).include("VAL00.+", Pattern.CASE_INSENSITIVE)) .execute().actionGet(); @@ -283,6 +294,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHintNoOrdinals()) .field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS)) .execute().actionGet(); @@ -306,6 +318,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest { response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms") + .executionHint(randomExecutionHintNoOrdinals()) .field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+", 0)) .execute().actionGet();