SignificantText aggregation had include/exclude logic back to front (#64520) (#64538)

Backport bugfix. SignificantText aggregation had include/exclude logic back to front.
Added test.

Closes #64519
This commit is contained in:
markharwood 2020-11-03 16:43:03 +00:00 committed by GitHub
parent 4851bc7bae
commit 1fb6206fbc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 1 deletions

View File

@ -256,7 +256,7 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
scratch.clear();
scratch.copyChars(termAtt);
BytesRef bytes = scratch.get();
if (includeExclude != null && includeExclude.accept(bytes)) {
if (includeExclude != null && false == includeExclude.accept(bytes)) {
continue;
}
if (inDocTerms.add(bytes) < 0) {

View File

@ -144,6 +144,65 @@ public class SignificantTextAggregatorTests extends AggregatorTestCase {
}
}
/**
* Uses the significant text aggregation to find the keywords in text fields and include/exclude selected terms
*/
public void testIncludeExcludes() throws IOException {
TextFieldType textFieldType = new TextFieldType("text");
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
indexWriterConfig.setMaxBufferedDocs(100);
indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
indexDocuments(w);
String [] incExcValues = {"duplicate"};
try (IndexReader reader = DirectoryReader.open(w)) {
assertEquals("test expects a single segment", 1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
// Inclusive of values
{
SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
includeExclude(new IncludeExclude(incExcValues, null));
SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
.subAggregation(sigAgg);
if(randomBoolean()){
sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
}
// Search "even" which should have duplication
InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
SignificantTerms terms = sampler.getAggregations().get("sig_text");
assertNull(terms.getBucketByKey("even"));
assertNotNull(terms.getBucketByKey("duplicate"));
assertTrue(AggregationInspectionHelper.hasValue(sampler));
}
// Exclusive of values
{
SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
includeExclude(new IncludeExclude(null, incExcValues));
SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
.subAggregation(sigAgg);
if(randomBoolean()){
sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
}
// Search "even" which should have duplication
InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
SignificantTerms terms = sampler.getAggregations().get("sig_text");
assertNotNull(terms.getBucketByKey("even"));
assertNull(terms.getBucketByKey("duplicate"));
assertTrue(AggregationInspectionHelper.hasValue(sampler));
}
}
}
}
public void testFieldAlias() throws IOException {
TextFieldType textFieldType = new TextFieldType("text");
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));