Add StopFilter makeStopSet methods that take a list, since we just call Arrays.asList() again anyway and we might already have a list

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@760058 13f79535-47bb-0310-9956-ffa450edef68
2009-03-30 17:26:55 +00:00 · 2009-03-30 17:26:55 +00:00 · c2b6731cc4
parent 7aa012ca27
commit c2b6731cc4
1 changed files with 26 additions and 1 deletions
--- a/src/java/org/apache/lucene/analysis/StopFilter.java
+++ b/src/java/org/apache/lucene/analysis/StopFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Set;
+import java.util.List;

 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@ -112,8 +113,20 @@ public final class StopFilter extends TokenFilter {
  }

  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
   *
-   * @param stopWords
+   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set makeStopSet(List/*<String>*/ stopWords) {
+    return makeStopSet(stopWords, false);
+  }
+    
+  /**
+   * 
+   * @param stopWords An array of stopwords
   * @param ignoreCase If true, all words are lower cased first.  
   * @return a Set containing the words
   */    
@ -123,6 +136,18 @@ public final class StopFilter extends TokenFilter {
    return stopSet;
  }

+  /**
+   *
+   * @param stopWords A List of Strings representing the stopwords
+   * @param ignoreCase if true, all words are lower cased first
+   * @return A Set containing the words
+   */
+  public static final Set makeStopSet(List/*<String>*/ stopWords, boolean ignoreCase){
+    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+    stopSet.addAll(stopWords);
+    return stopSet;
+  }
+  
  /**
   * Returns the next input Token whose term() is not a stop word.
   */