mirror of https://github.com/apache/lucene.git
LUCENE-5699 - fixed javadoc
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619699 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c107043a52
commit
601c09bcd4
|
@ -47,11 +47,34 @@ import org.apache.lucene.util.BytesRef;
|
||||||
*/
|
*/
|
||||||
public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
protected AtomicReader atomicReader;
|
protected AtomicReader atomicReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* names of the fields to be used as input text
|
||||||
|
*/
|
||||||
protected String[] textFieldNames;
|
protected String[] textFieldNames;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* name of the field to be used as a class / category output
|
||||||
|
*/
|
||||||
protected String classFieldName;
|
protected String classFieldName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link org.apache.lucene.analysis.Analyzer} to be used for tokenizing unseen input text
|
||||||
|
*/
|
||||||
protected Analyzer analyzer;
|
protected Analyzer analyzer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link org.apache.lucene.search.IndexSearcher} to run searches on the index for retrieving frequencies
|
||||||
|
*/
|
||||||
protected IndexSearcher indexSearcher;
|
protected IndexSearcher indexSearcher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link org.apache.lucene.search.Query} used to eventually filter the document set to be used to classify
|
||||||
|
*/
|
||||||
protected Query query;
|
protected Query query;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -172,6 +195,12 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
||||||
return returnList;
|
return returnList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* count the number of documents in the index having at least a value for the 'class' field
|
||||||
|
*
|
||||||
|
* @return the no. of documents having a value for the 'class' field
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
protected int countDocsWithClass() throws IOException {
|
protected int countDocsWithClass() throws IOException {
|
||||||
int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
|
int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
|
||||||
if (docCount == -1) { // in case codec doesn't support getDocCount
|
if (docCount == -1) { // in case codec doesn't support getDocCount
|
||||||
|
@ -188,6 +217,13 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
||||||
return docCount;
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* tokenize a <code>String</code> on this classifier's text fields and analyzer
|
||||||
|
*
|
||||||
|
* @param doc the <code>String</code> representing an input text (to be classified)
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
protected String[] tokenizeDoc(String doc) throws IOException {
|
protected String[] tokenizeDoc(String doc) throws IOException {
|
||||||
Collection<String> result = new LinkedList<>();
|
Collection<String> result = new LinkedList<>();
|
||||||
for (String textFieldName : textFieldNames) {
|
for (String textFieldName : textFieldNames) {
|
||||||
|
|
Loading…
Reference in New Issue