SignificantText aggregation had include/exclude logic back to front (#64520) (#64538)

Backport bugfix. SignificantText aggregation had include/exclude logic back to front. Added test. Closes #64519
2025-03-09 14:34:43 +00:00 · 2020-11-03 16:43:03 +00:00 · 2020-11-03 16:43:03 +00:00 · 1fb6206fbc
commit 1fb6206fbc
parent 4851bc7bae
2 changed files with 60 additions and 1 deletions
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java
@ -256,7 +256,7 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
                            scratch.clear();
                            scratch.copyChars(termAtt);
                            BytesRef bytes = scratch.get();
-                            if (includeExclude != null && includeExclude.accept(bytes)) {
+                            if (includeExclude != null && false == includeExclude.accept(bytes)) {
                                continue;
                            }
                            if (inDocTerms.add(bytes) < 0) {
--- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java
@ -144,6 +144,65 @@ public class SignificantTextAggregatorTests extends AggregatorTestCase {
        }
    }

+    /**
+     * Uses the significant text aggregation to find the keywords in text fields and include/exclude selected terms
+     */
+    public void testIncludeExcludes() throws IOException {
+        TextFieldType textFieldType = new TextFieldType("text");
+        textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
+
+        IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
+        indexWriterConfig.setMaxBufferedDocs(100);
+        indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            indexDocuments(w);
+
+            String [] incExcValues = {"duplicate"};
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                assertEquals("test expects a single segment", 1, reader.leaves().size());
+                IndexSearcher searcher = new IndexSearcher(reader);
+
+                // Inclusive of values
+                {
+                    SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
+                        includeExclude(new IncludeExclude(incExcValues, null));
+                    SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
+                        .subAggregation(sigAgg);
+                    if(randomBoolean()){
+                        sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
+                    }
+                    // Search "even" which should have duplication
+                    InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
+                    SignificantTerms terms = sampler.getAggregations().get("sig_text");
+
+                    assertNull(terms.getBucketByKey("even"));
+                    assertNotNull(terms.getBucketByKey("duplicate"));
+                    assertTrue(AggregationInspectionHelper.hasValue(sampler));
+
+                }
+                // Exclusive of values
+                {
+                    SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
+                        includeExclude(new IncludeExclude(null, incExcValues));
+                    SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
+                        .subAggregation(sigAgg);
+                    if(randomBoolean()){
+                        sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
+                    }
+                    // Search "even" which should have duplication
+                    InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
+                    SignificantTerms terms = sampler.getAggregations().get("sig_text");
+
+                    assertNotNull(terms.getBucketByKey("even"));
+                    assertNull(terms.getBucketByKey("duplicate"));
+                    assertTrue(AggregationInspectionHelper.hasValue(sampler));
+
+                }
+            }
+        }
+    }
+
    public void testFieldAlias() throws IOException {
        TextFieldType textFieldType = new TextFieldType("text");
        textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));