From af2b903f65e4451838fb3e93511329acec30a2a1 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Wed, 15 Nov 2017 08:57:47 -0500 Subject: [PATCH] LUCENE-8040: optimize IndexSearcher.collectionStatistics --- lucene/CHANGES.txt | 6 +++++ .../apache/lucene/search/IndexSearcher.java | 26 +++++++++---------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 784677e01cd..ff838dd9769 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -41,6 +41,12 @@ Improvements Add missing range checks for similarity parameters. Improve BM25 and ClassicSimilarity's explanations. (Robert Muir) +Optimizations + +* LUCENE-8040: Optimize IndexSearcher.collectionStatistics, avoiding MultiFields/MultiTerms + (David Smiley, Robert Muir) + + ======================= Lucene 7.2.0 ======================= API Changes diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index e2283eadff4..e35e70bbd2d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -37,7 +37,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -778,21 +777,22 @@ public class IndexSearcher { * @lucene.experimental */ public CollectionStatistics collectionStatistics(String field) throws IOException { - final int docCount; - final long sumTotalTermFreq; - final long sumDocFreq; - assert field != null; - - Terms terms = MultiFields.getTerms(reader, field); - if (terms == null) { + long docCount = 0; + long sumTotalTermFreq = 0; + long sumDocFreq = 0; + for (LeafReaderContext leaf : reader.leaves()) { + final Terms terms = leaf.reader().terms(field); + if (terms == null) { + continue; + } + docCount += terms.getDocCount(); + sumTotalTermFreq += terms.getSumTotalTermFreq(); + sumDocFreq += terms.getSumDocFreq(); + } + if (docCount == 0) { return null; } - - docCount = terms.getDocCount(); - sumTotalTermFreq = terms.getSumTotalTermFreq(); - sumDocFreq = terms.getSumDocFreq(); - return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); } }