Backport bugfix. SignificantText aggregation had include/exclude logic back to front. Added test. Closes #64519
This commit is contained in:
parent
4851bc7bae
commit
1fb6206fbc
|
@ -256,7 +256,7 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
|
|||
scratch.clear();
|
||||
scratch.copyChars(termAtt);
|
||||
BytesRef bytes = scratch.get();
|
||||
if (includeExclude != null && includeExclude.accept(bytes)) {
|
||||
if (includeExclude != null && false == includeExclude.accept(bytes)) {
|
||||
continue;
|
||||
}
|
||||
if (inDocTerms.add(bytes) < 0) {
|
||||
|
|
|
@ -144,6 +144,65 @@ public class SignificantTextAggregatorTests extends AggregatorTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the significant text aggregation to find the keywords in text fields and include/exclude selected terms
|
||||
*/
|
||||
public void testIncludeExcludes() throws IOException {
|
||||
TextFieldType textFieldType = new TextFieldType("text");
|
||||
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
|
||||
|
||||
IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
|
||||
indexWriterConfig.setMaxBufferedDocs(100);
|
||||
indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
|
||||
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
|
||||
indexDocuments(w);
|
||||
|
||||
String [] incExcValues = {"duplicate"};
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(w)) {
|
||||
assertEquals("test expects a single segment", 1, reader.leaves().size());
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
// Inclusive of values
|
||||
{
|
||||
SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
|
||||
includeExclude(new IncludeExclude(incExcValues, null));
|
||||
SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
|
||||
.subAggregation(sigAgg);
|
||||
if(randomBoolean()){
|
||||
sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
|
||||
}
|
||||
// Search "even" which should have duplication
|
||||
InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
|
||||
SignificantTerms terms = sampler.getAggregations().get("sig_text");
|
||||
|
||||
assertNull(terms.getBucketByKey("even"));
|
||||
assertNotNull(terms.getBucketByKey("duplicate"));
|
||||
assertTrue(AggregationInspectionHelper.hasValue(sampler));
|
||||
|
||||
}
|
||||
// Exclusive of values
|
||||
{
|
||||
SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
|
||||
includeExclude(new IncludeExclude(null, incExcValues));
|
||||
SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
|
||||
.subAggregation(sigAgg);
|
||||
if(randomBoolean()){
|
||||
sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
|
||||
}
|
||||
// Search "even" which should have duplication
|
||||
InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
|
||||
SignificantTerms terms = sampler.getAggregations().get("sig_text");
|
||||
|
||||
assertNotNull(terms.getBucketByKey("even"));
|
||||
assertNull(terms.getBucketByKey("duplicate"));
|
||||
assertTrue(AggregationInspectionHelper.hasValue(sampler));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testFieldAlias() throws IOException {
|
||||
TextFieldType textFieldType = new TextFieldType("text");
|
||||
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
|
||||
|
|
Loading…
Reference in New Issue