Let CachingNBClassifier be constructed from IndexReaders

This commit is contained in:
Tommaso Teofili 2016-06-21 13:43:54 +02:00
parent fcf4389d82
commit daa26090a3
1 changed files with 4 additions and 6 deletions

View File

@ -24,7 +24,7 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
@ -36,8 +36,6 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.util.BytesRef;
/**
* A simplistic Lucene based NaiveBayes classifier, with caching feature, see
* <code>http://en.wikipedia.org/wiki/Naive_Bayes_classifier</code>
@ -61,15 +59,15 @@ public class CachingNaiveBayesClassifier extends SimpleNaiveBayesClassifier {
* Creates a new NaiveBayes classifier with inside caching. If you want less memory usage you could call
* {@link #reInitCache(int, boolean) reInitCache()}.
*
* @param leafReader the reader on the index to be used for classification
* @param indexReader the reader on the index to be used for classification
* @param analyzer an {@link Analyzer} used to analyze unseen text
* @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
* if all the indexed docs should be used
* @param classFieldName the name of the field used as the output for the classifier
* @param textFieldNames the name of the fields used as the inputs for the classifier
*/
public CachingNaiveBayesClassifier(LeafReader leafReader, Analyzer analyzer, Query query, String classFieldName, String... textFieldNames) {
super(leafReader, analyzer, query, classFieldName, textFieldNames);
public CachingNaiveBayesClassifier(IndexReader indexReader, Analyzer analyzer, Query query, String classFieldName, String... textFieldNames) {
super(indexReader, analyzer, query, classFieldName, textFieldNames);
// building the cache
try {
reInitCache(0, true);