Fix handling of empty keyword in terms aggregation (#34457)

Empty values on keyword fields are filtered by the `map` execution mode
of the `terms` aggregation. This commit restores them as valid buckets.

Closes #34434
This commit is contained in:
Jim Ferenczi 2018-10-15 19:33:52 +01:00 committed by GitHub
parent 8e133ab451
commit 67577fca56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 7 deletions

View File

@ -93,7 +93,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
if (includeExclude != null && !includeExclude.accept(bytes)) {
continue;
}
if (previous.get().equals(bytes)) {
if (i > 0 && previous.get().equals(bytes)) {
continue;
}
long bucketOrdinal = bucketOrds.add(bytes);

View File

@ -158,6 +158,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("")));
document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
indexWriter.addDocument(document);
@ -165,6 +166,9 @@ public class TermsAggregatorTests extends AggregatorTestCase {
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("")));
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
@ -181,15 +185,17 @@ public class TermsAggregatorTests extends AggregatorTestCase {
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
Terms result = (Terms) aggregator.buildAggregation(0L);
assertEquals(4, result.getBuckets().size());
assertEquals("a", result.getBuckets().get(0).getKeyAsString());
assertEquals(5, result.getBuckets().size());
assertEquals("", result.getBuckets().get(0).getKeyAsString());
assertEquals(2L, result.getBuckets().get(0).getDocCount());
assertEquals("b", result.getBuckets().get(1).getKeyAsString());
assertEquals("a", result.getBuckets().get(1).getKeyAsString());
assertEquals(2L, result.getBuckets().get(1).getDocCount());
assertEquals("c", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
assertEquals("d", result.getBuckets().get(3).getKeyAsString());
assertEquals("b", result.getBuckets().get(2).getKeyAsString());
assertEquals(2L, result.getBuckets().get(2).getDocCount());
assertEquals("c", result.getBuckets().get(3).getKeyAsString());
assertEquals(1L, result.getBuckets().get(3).getDocCount());
assertEquals("d", result.getBuckets().get(4).getKeyAsString());
assertEquals(1L, result.getBuckets().get(4).getDocCount());
}
}
}