From e937e739f38585a01709aa2b9c471a7b40d52582 Mon Sep 17 00:00:00 2001 From: Greg Miller Date: Sat, 7 Aug 2021 07:32:50 -0700 Subject: [PATCH] LUCENE-10046: Fix counting bug in StringValueFacetCounts (#236) --- lucene/CHANGES.txt | 2 ++ .../lucene/facet/StringValueFacetCounts.java | 2 +- .../facet/TestStringValueFacetCounts.java | 34 +++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2501a939c14..2605de8be51 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -433,6 +433,8 @@ Bug Fixes * LUCENE-10039: Correct CombinedFieldQuery scoring when there is a single field. (Julie Tibshirani) +* LUCENE-10046: Counting bug fixed in StringValueFacetCounts. (Greg Miller) + Other --------------------- (No changes) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java index bf51e30dbc8..6100d5f3aba 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java @@ -375,7 +375,7 @@ public class StringValueFacetCounts extends Facets { int term = (int) segValues.nextOrd(); boolean countedDocInTotal = false; while (term != SortedSetDocValues.NO_MORE_ORDS) { - increment(term); + increment((int) ordMap.get(term)); if (countedDocInTotal == false) { totalDocCount++; } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java index 7d7d88889c8..886c3da2c67 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java @@ -140,6 +140,40 @@ public class TestStringValueFacetCounts extends FacetTestCase { IOUtils.close(searcher.getIndexReader(), dir); } + public void testSparseMultiSegmentCase() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + + Map expectedCounts = new HashMap<>(); + + // Create two segments, each with only one doc that has a large number of SSDV field values. + // This ensures "sparse" counting will occur in StringValueFacetCounts (i.e., small number + // of hits relative to the field cardinality): + Document doc = new Document(); + for (int i = 0; i < 100; i++) { + doc.add(new SortedSetDocValuesField("field", new BytesRef("foo_" + i))); + expectedCounts.put("foo_" + i, 1); + } + writer.addDocument(doc); + writer.commit(); + + doc = new Document(); + for (int i = 0; i < 100; i++) { + doc.add(new SortedSetDocValuesField("field", new BytesRef("bar_" + i))); + expectedCounts.put("bar_" + i, 1); + } + writer.addDocument(doc); + + int expectedTotalDocCount = 2; + + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + checkFacetResult(expectedCounts, expectedTotalDocCount, searcher, 10, 2, 1, 0); + + IOUtils.close(searcher.getIndexReader(), dir); + } + public void testMissingSegment() throws Exception { Directory dir = newDirectory();