mirror of https://github.com/apache/lucene.git
LUCENE-4782 - fixed SNBC docsWithClassSize initialization in case of codec doesn't support Terms#getDocCount
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1448204 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b44aa61834
commit
87dd823a0a
|
@ -38,7 +38,7 @@
|
||||||
<pathelement location="${codecs.jar}"/>
|
<pathelement location="${codecs.jar}"/>
|
||||||
<path refid="test.base.classpath"/>
|
<path refid="test.base.classpath"/>
|
||||||
</path>
|
</path>
|
||||||
|
<target name="dist-maven" depends="dist-maven-src-java"/>
|
||||||
<target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core" />
|
<target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core" />
|
||||||
|
|
||||||
<target name="jar-core" depends="common.jar-core" />
|
<target name="jar-core" depends="common.jar-core" />
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TotalHitCountCollector;
|
import org.apache.lucene.search.TotalHitCountCollector;
|
||||||
|
import org.apache.lucene.search.WildcardQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -69,7 +70,18 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
||||||
this.textFieldName = textFieldName;
|
this.textFieldName = textFieldName;
|
||||||
this.classFieldName = classFieldName;
|
this.classFieldName = classFieldName;
|
||||||
this.analyzer = analyzer;
|
this.analyzer = analyzer;
|
||||||
this.docsWithClassSize = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
|
this.docsWithClassSize = countDocsWithClass();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int countDocsWithClass() throws IOException {
|
||||||
|
int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
|
||||||
|
if (docCount == -1) { // in case codec doesn't support getDocCount
|
||||||
|
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
|
||||||
|
indexSearcher.search(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
|
||||||
|
totalHitCountCollector);
|
||||||
|
docCount = totalHitCountCollector.getTotalHits();
|
||||||
|
}
|
||||||
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String[] tokenizeDoc(String doc) throws IOException {
|
private String[] tokenizeDoc(String doc) throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue