LUCENE-5548 - minor fixes (imports, comments, method names)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1638718 13f79535-47bb-0310-9956-ffa450edef68
2014-11-12 08:40:02 +00:00 · 2014-11-12 08:40:02 +00:00 · 88f2ebd5d0
parent a1f3cebe50
commit 88f2ebd5d0
5 changed files with 44 additions and 47 deletions
--- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
@ -16,6 +16,14 @@
 */
 package org.apache.lucene.classification;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Term;
@ -29,14 +37,6 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 /**
 * A k-Nearest Neighbor classifier (see <code>http://en.wikipedia.org/wiki/K-nearest_neighbors</code>) based
 * on {@link MoreLikeThis}
@ -82,14 +82,14 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
   */
  @Override
  public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
-    TopDocs topDocs=knnSearcher(text);
+    TopDocs topDocs = knnSearch(text);
-    List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+    List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
-    ClassificationResult<BytesRef> retval=null;
+    ClassificationResult<BytesRef> retval = null;
-    double maxscore=-Double.MAX_VALUE;
+    double maxscore = -Double.MAX_VALUE;
-    for(ClassificationResult<BytesRef> element:doclist){
+    for (ClassificationResult<BytesRef> element : doclist) {
-      if(element.getScore()>maxscore){
+      if (element.getScore() > maxscore) {
-        retval=element;
+        retval = element;
-        maxscore=element.getScore();
+        maxscore = element.getScore();
      }
    }
    return retval;
@ -100,8 +100,8 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
   */
  @Override
  public List<ClassificationResult<BytesRef>> getClasses(String text) throws IOException {
-    TopDocs topDocs=knnSearcher(text);
+    TopDocs topDocs = knnSearch(text);
-    List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+    List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
    Collections.sort(doclist);
    return doclist;
  }
@ -111,13 +111,13 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
   */
  @Override
  public List<ClassificationResult<BytesRef>> getClasses(String text, int max) throws IOException {
-    TopDocs topDocs=knnSearcher(text);
+    TopDocs topDocs = knnSearch(text);
-    List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+    List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
    Collections.sort(doclist);
    return doclist.subList(0, max);
  }
-  private TopDocs knnSearcher(String text) throws IOException{
+  private TopDocs knnSearch(String text) throws IOException {
    if (mlt == null) {
      throw new IOException("You must first call Classifier#train");
    }
@ -145,18 +145,17 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
      }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
-    int sumdoc=0;
+    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
      Integer count = entry.getValue();
      returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
-        sumdoc+=count;
+      sumdoc += count;
    }
    //correction
-    if(sumdoc<k){
+    if (sumdoc < k) {
-      for(ClassificationResult<BytesRef> cr:returnList){
+      for (ClassificationResult<BytesRef> cr : returnList) {
-        cr.setScore(cr.getScore()*(double)k/(double)sumdoc);
+        cr.setScore(cr.getScore() * (double) k / (double) sumdoc);
      }
    }
    return returnList;
--- a/lucene/classification/src/java/org/apache/lucene/classification/package.html
+++ b/lucene/classification/src/java/org/apache/lucene/classification/package.html
@ -17,7 +17,6 @@
 <html>
 <body>
 Uses already seen data (the indexed documents) to classify new documents.
-Currently only contains a (simplistic) Lucene based Naive Bayes classifier,
+Currently contains a (simplistic) Naive Bayes classifier, a k-Nearest Neighbor classifier and a Perceptron based classifier
 a k-Nearest Neighbor classifier and a Perceptron based classifier
 </body>
 </html>
--- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
@ -17,14 +17,16 @@ package org.apache.lucene.classification.utils;
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.StorableField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
@ -32,8 +34,6 @@ import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import java.io.IOException;
 /**
 * Utility class for creating training / test / cross validation indexes from the original index.
 */
--- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java
@ -16,12 +16,12 @@
 */
 package org.apache.lucene.classification.utils;
 import java.io.IOException;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
 /**
 * utility class for converting Lucene {@link org.apache.lucene.document.Document}s to <code>Double</code> vectors.
 */
--- a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
+++ b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
@ -33,7 +33,6 @@ import java.io.Reader;
 /**
 * Testcase for {@link SimpleNaiveBayesClassifier}
 */
 // TODO : eventually remove this if / when fallback methods exist for all un-supportable codec methods (see LUCENE-4872)
 public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase<BytesRef> {
  @Test