mirror of https://github.com/apache/lucene.git
LUCENE-4782 - fixed SNBC docsWithClassSize initialization in case of codec doesn't support Terms#getDocCount
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1448204 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b44aa61834
commit
87dd823a0a
|
@ -38,7 +38,7 @@
|
|||
<pathelement location="${codecs.jar}"/>
|
||||
<path refid="test.base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="dist-maven" depends="dist-maven-src-java"/>
|
||||
<target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core" />
|
||||
|
||||
<target name="jar-core" depends="common.jar-core" />
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -69,7 +70,18 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
|||
this.textFieldName = textFieldName;
|
||||
this.classFieldName = classFieldName;
|
||||
this.analyzer = analyzer;
|
||||
this.docsWithClassSize = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
|
||||
this.docsWithClassSize = countDocsWithClass();
|
||||
}
|
||||
|
||||
private int countDocsWithClass() throws IOException {
|
||||
int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
|
||||
if (docCount == -1) { // in case codec doesn't support getDocCount
|
||||
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
|
||||
indexSearcher.search(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
|
||||
totalHitCountCollector);
|
||||
docCount = totalHitCountCollector.getTotalHits();
|
||||
}
|
||||
return docCount;
|
||||
}
|
||||
|
||||
private String[] tokenizeDoc(String doc) throws IOException {
|
||||
|
|
Loading…
Reference in New Issue