mirror of https://github.com/apache/lucene.git
LUCENE-8040: optimize IndexSearcher.collectionStatistics
This commit is contained in:
parent
183571c085
commit
af2b903f65
|
@ -41,6 +41,12 @@ Improvements
|
|||
Add missing range checks for similarity parameters.
|
||||
Improve BM25 and ClassicSimilarity's explanations. (Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-8040: Optimize IndexSearcher.collectionStatistics, avoiding MultiFields/MultiTerms
|
||||
(David Smiley, Robert Muir)
|
||||
|
||||
|
||||
======================= Lucene 7.2.0 =======================
|
||||
|
||||
API Changes
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -778,21 +777,22 @@ public class IndexSearcher {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public CollectionStatistics collectionStatistics(String field) throws IOException {
|
||||
final int docCount;
|
||||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
|
||||
assert field != null;
|
||||
|
||||
Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms == null) {
|
||||
long docCount = 0;
|
||||
long sumTotalTermFreq = 0;
|
||||
long sumDocFreq = 0;
|
||||
for (LeafReaderContext leaf : reader.leaves()) {
|
||||
final Terms terms = leaf.reader().terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
docCount += terms.getDocCount();
|
||||
sumTotalTermFreq += terms.getSumTotalTermFreq();
|
||||
sumDocFreq += terms.getSumDocFreq();
|
||||
}
|
||||
if (docCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
docCount = terms.getDocCount();
|
||||
sumTotalTermFreq = terms.getSumTotalTermFreq();
|
||||
sumDocFreq = terms.getSumDocFreq();
|
||||
|
||||
return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue