- 'De-finalized' the class per Doug's suggestion to make it easy to use

different lists of stop words.
- Added a few more words to the stop word list (MS' contribution via Alan).
- Re-indented the whole class.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149696 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2002-02-21 22:01:07 +00:00
parent ef902b7ecd
commit 5c75b8623f
1 changed files with 41 additions and 28 deletions

View File

@ -60,36 +60,49 @@ import java.util.Hashtable;
/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link /** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
* LowerCaseFilter} and {@link StopFilter}. */ * LowerCaseFilter} and {@link StopFilter}. */
public final class StandardAnalyzer extends Analyzer { public class StandardAnalyzer extends Analyzer {
private Hashtable stopTable; private Hashtable stopTable;
/** An array containing some common English words that are not usually useful /** An array containing some common English words that are usually not
for searching. */ useful for searching. */
public static final String[] STOP_WORDS = { public static final String[] STOP_WORDS = {
"a", "and", "are", "as", "at", "be", "but", "by", "0","1","2","3","4","5","6","7","8","9",
"for", "if", "in", "into", "is", "it", "$",
"no", "not", "of", "on", "or", "s", "such", "about", "after", "all", "also", "an", "and",
"t", "that", "the", "their", "then", "there", "these", "another", "any", "are", "as", "at", "be", "because",
"they", "this", "to", "was", "will", "with" "been", "before", "being", "between", "both", "but",
}; "by","came","can","come","could","did","do","does",
"each","else","for","from","get","got","has","had",
"he","have","her","here","him","himself","his","how",
"if","in","into","is","it","its","just","like","make",
"many","me","might","more","most","much","must","my",
"never","now","of","on","only","or","other","our","out",
"over","re","said","same","see","should","since","so",
"some","still","such","take","than","that","the","their",
"them","then","there","these","they","this","those","through",
"to","too","under","up","use","very","want","was","way","we",
"well","were","what","when","where","which","while","who","will",
"with","would","you","your", "a","b","c","d","e","f","g","h","i",
"j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"
};
/** Builds an analyzer. */ /** Builds an analyzer. */
public StandardAnalyzer() { public StandardAnalyzer() {
this(STOP_WORDS); this(STOP_WORDS);
} }
/** Builds an analyzer with the given stop words. */ /** Builds an analyzer with the given stop words. */
public StandardAnalyzer(String[] stopWords) { public StandardAnalyzer(String[] stopWords) {
stopTable = StopFilter.makeStopTable(stopWords); stopTable = StopFilter.makeStopTable(stopWords);
} }
/** Constructs a {@link StandardTokenizer} filtered by a {@link /** Constructs a {@link StandardTokenizer} filtered by a {@link
* StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ * StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
public final TokenStream tokenStream(String fieldName, Reader reader) { public final TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader); TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result); result = new StandardFilter(result);
result = new LowerCaseFilter(result); result = new LowerCaseFilter(result);
result = new StopFilter(result, stopTable); result = new StopFilter(result, stopTable);
return result; return result;
} }
} }