LUCENE-4782 - fixed SNBC docsWithClassSize initialization in case of codec doesn't support Terms#getDocCount

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1448204 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tommaso Teofili 2013-02-20 15:00:53 +00:00
parent b44aa61834
commit 87dd823a0a
2 changed files with 14 additions and 2 deletions

View File

@ -38,7 +38,7 @@
<pathelement location="${codecs.jar}"/>
<path refid="test.base.classpath"/>
</path>
<target name="dist-maven" depends="dist-maven-src-java"/>
<target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core" />
<target name="jar-core" depends="common.jar-core" />

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@ -69,7 +70,18 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
this.textFieldName = textFieldName;
this.classFieldName = classFieldName;
this.analyzer = analyzer;
this.docsWithClassSize = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
this.docsWithClassSize = countDocsWithClass();
}
private int countDocsWithClass() throws IOException {
int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
if (docCount == -1) { // in case codec doesn't support getDocCount
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
indexSearcher.search(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
totalHitCountCollector);
docCount = totalHitCountCollector.getTotalHits();
}
return docCount;
}
private String[] tokenizeDoc(String doc) throws IOException {