LUCENE-5699 - fixed javadoc

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619699 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tommaso Teofili 2014-08-22 08:02:15 +00:00
parent c107043a52
commit 601c09bcd4
1 changed files with 36 additions and 0 deletions

View File

@ -47,11 +47,34 @@ import org.apache.lucene.util.BytesRef;
*/ */
public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> { public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
/**
*
*/
protected AtomicReader atomicReader; protected AtomicReader atomicReader;
/**
* names of the fields to be used as input text
*/
protected String[] textFieldNames; protected String[] textFieldNames;
/**
* name of the field to be used as a class / category output
*/
protected String classFieldName; protected String classFieldName;
/**
* {@link org.apache.lucene.analysis.Analyzer} to be used for tokenizing unseen input text
*/
protected Analyzer analyzer; protected Analyzer analyzer;
/**
* {@link org.apache.lucene.search.IndexSearcher} to run searches on the index for retrieving frequencies
*/
protected IndexSearcher indexSearcher; protected IndexSearcher indexSearcher;
/**
* {@link org.apache.lucene.search.Query} used to eventually filter the document set to be used to classify
*/
protected Query query; protected Query query;
/** /**
@ -172,6 +195,12 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
return returnList; return returnList;
} }
/**
* count the number of documents in the index having at least a value for the 'class' field
*
* @return the no. of documents having a value for the 'class' field
* @throws IOException
*/
protected int countDocsWithClass() throws IOException { protected int countDocsWithClass() throws IOException {
int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount(); int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
if (docCount == -1) { // in case codec doesn't support getDocCount if (docCount == -1) { // in case codec doesn't support getDocCount
@ -188,6 +217,13 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
return docCount; return docCount;
} }
/**
* tokenize a <code>String</code> on this classifier's text fields and analyzer
*
* @param doc the <code>String</code> representing an input text (to be classified)
* @return
* @throws IOException
*/
protected String[] tokenizeDoc(String doc) throws IOException { protected String[] tokenizeDoc(String doc) throws IOException {
Collection<String> result = new LinkedList<>(); Collection<String> result = new LinkedList<>();
for (String textFieldName : textFieldNames) { for (String textFieldName : textFieldNames) {