From 11006fba59d1d4cad28c7c4c917c9288f5206fc5 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 25 Jan 2022 16:11:19 +0100 Subject: [PATCH] LUCENE-10002: Replace simple usages of TotalHitCountCollector with IndexSearcher#count (#612) In case only number of documents are collected, IndexSearcher#search(Query, Collector) is commonly used, which does not use the executor that's been eventually set to the searcher. Calling `IndexSearcher#count(Query)` makes the code more concise and is also more correct as it honours the executor that's been set to the searcher instance. Co-authored-by: Adrien Grand --- .../classification/CachingNaiveBayesClassifier.java | 5 +---- .../classification/SimpleNaiveBayesClassifier.java | 9 ++------- .../document/SimpleNaiveBayesDocumentClassifier.java | 5 +---- .../org/apache/lucene/search/TotalHitCountCollector.java | 7 ++++++- .../apache/lucene/search/TestFuzzyTermOnShortTerms.java | 5 ++--- .../test/org/apache/lucene/search/TestLRUQueryCache.java | 2 +- .../test/org/apache/lucene/search/join/TestJoinUtil.java | 5 ++--- .../lucene/spatial/prefix/TestHeatmapFacetCounter.java | 8 +++----- 8 files changed, 18 insertions(+), 28 deletions(-) diff --git a/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java index db8c6dbba3d..deff4202167 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java @@ -32,7 +32,6 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.util.BytesRef; /** @@ -179,10 +178,8 @@ public class CachingNaiveBayesClassifier extends SimpleNaiveBayesClassifier { if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } - TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); - indexSearcher.search(booleanQuery.build(), totalHitCountCollector); - int ret = totalHitCountCollector.getTotalHits(); + int ret = indexSearcher.count(booleanQuery.build()); if (ret != 0) { searched.put(cclass, ret); } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java index c1b56014950..668c6499f41 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java @@ -35,7 +35,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; @@ -169,7 +168,6 @@ public class SimpleNaiveBayesClassifier implements Classifier { Terms terms = MultiTerms.getTerms(this.indexReader, this.classFieldName); int docCount; if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount - TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector(); BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add( new BooleanClause( @@ -179,8 +177,7 @@ public class SimpleNaiveBayesClassifier implements Classifier { if (query != null) { q.add(query, BooleanClause.Occur.MUST); } - indexSearcher.search(q.build(), classQueryCountCollector); - docCount = classQueryCountCollector.getTotalHits(); + docCount = indexSearcher.count(q.build()); } else { docCount = terms.getDocCount(); } @@ -276,9 +273,7 @@ public class SimpleNaiveBayesClassifier implements Classifier { if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } - TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); - indexSearcher.search(booleanQuery.build(), totalHitCountCollector); - return totalHitCountCollector.getTotalHits(); + return indexSearcher.count(booleanQuery.build()); } private double calculateLogPrior(Term term, int docsWithClassSize) throws IOException { diff --git a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java index 8110c96dd50..d8ff07d8fd7 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java @@ -40,7 +40,6 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.util.BytesRef; /** @@ -263,9 +262,7 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } - TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); - indexSearcher.search(booleanQuery.build(), totalHitCountCollector); - return totalHitCountCollector.getTotalHits(); + return indexSearcher.count(booleanQuery.build()); } private double calculateLogPrior(Term term, int docsWithClassSize) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java index 4d283eadce4..9d9ad4149b0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java @@ -16,7 +16,12 @@ */ package org.apache.lucene.search; -/** Just counts the total number of hits. */ +/** + * Just counts the total number of hits. For cases when this is the only collector used, {@link + * IndexSearcher#count(Query)} should be called instead of {@link IndexSearcher#search(Query, + * Collector)} as the former is faster whenever the count can be returned directly from the index + * statistics. + */ public class TotalHitCountCollector extends SimpleCollector { private int totalHits; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFuzzyTermOnShortTerms.java b/lucene/core/src/test/org/apache/lucene/search/TestFuzzyTermOnShortTerms.java index 73b755bf66a..c2261acfd69 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFuzzyTermOnShortTerms.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFuzzyTermOnShortTerms.java @@ -62,9 +62,8 @@ public class TestFuzzyTermOnShortTerms extends LuceneTestCase { Directory d = getDirectory(analyzer, docs); IndexReader r = DirectoryReader.open(d); IndexSearcher s = new IndexSearcher(r); - TotalHitCountCollector c = new TotalHitCountCollector(); - s.search(q, c); - assertEquals(q.toString(), expected, c.getTotalHits()); + int totalHits = s.count(q); + assertEquals(q.toString(), expected, totalHits); r.close(); d.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 18953ad6b7f..c7223a4a6d6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -1169,7 +1169,7 @@ public class TestLRUQueryCache extends LuceneTestCase { searcher.setQueryCachingPolicy(ALWAYS_CACHE); BadQuery query = new BadQuery(); - searcher.count(query); + searcher.search(query, new TotalHitCountCollector()); query.i[0] += 1; // change the hashCode! try { diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index cea1862552d..ef6e834a93c 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -681,15 +681,14 @@ public class TestJoinUtil extends LuceneTestCase { Query joinQuery = JoinUtil.createJoinQuery( "join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max); - TotalHitCountCollector collector = new TotalHitCountCollector(); - searcher.search(joinQuery, collector); + int totalHits = searcher.count(joinQuery); int expectedCount = 0; for (int numChildDocs : childDocsPerParent) { if (numChildDocs >= min && numChildDocs <= max) { expectedCount++; } } - assertEquals(expectedCount, collector.getTotalHits()); + assertEquals(expectedCount, totalHits); } searcher.getIndexReader().close(); dir.close(); diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/TestHeatmapFacetCounter.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/TestHeatmapFacetCounter.java index 8a2f72d0f63..b3080d6a4ec 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/TestHeatmapFacetCounter.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/TestHeatmapFacetCounter.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.spatial.StrategyTestCase; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; @@ -282,13 +281,12 @@ public class TestHeatmapFacetCounter extends StrategyTestCase { Query filter = new IntersectsPrefixTreeQuery( pt, strategy.getFieldName(), grid, facetLevel, grid.getMaxLevels()); - final TotalHitCountCollector collector = new TotalHitCountCollector(); - indexSearcher.search(filter, collector); + int totalHits = indexSearcher.count(filter); cellsValidated++; - if (collector.getTotalHits() > 0) { + if (totalHits > 0) { cellValidatedNonZero++; } - return collector.getTotalHits(); + return totalHits; } private Shape randomIndexedShape() {