LUCENE-2165: SnowballAnalyzer was missing Set-based ctor

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@891209 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2009-12-16 12:13:36 +00:00
parent 383fc2d635
commit f616a47036
3 changed files with 22 additions and 1 deletions

View File

@ -33,6 +33,9 @@ API Changes
* LUCENE-2108: Add SpellChecker.close, to close the underlying
reader. (Eirik Bjørsnøs via Mike McCandless)
* LUCENE-2165: Add a constructor to SnowballAnalyzer that takes a Set of
stopwords, and deprecate the String[] one. (Nick Burch via Robert Muir)
New features
* LUCENE-2102: Add a Turkish LowerCase Filter. TurkishLowerCaseFilter handles

View File

@ -51,12 +51,22 @@ public final class SnowballAnalyzer extends Analyzer {
this.matchVersion = matchVersion;
}
/** Builds the named analyzer with the given stop words. */
/**
* Builds the named analyzer with the given stop words.
* @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.
*/
public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
this(matchVersion, name);
stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
}
/** Builds the named analyzer with the given stop words. */
public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
this(matchVersion, name);
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
stopWords));
}
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
and a {@link SnowballFilter} */

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Payload;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@ -37,6 +38,13 @@ public class TestSnowball extends BaseTokenStreamTestCase {
new String[]{"he", "abhor", "accent"});
}
public void testStopwords() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English",
StandardAnalyzer.STOP_WORDS_SET);
assertAnalyzesTo(a, "the quick brown fox jumped",
new String[]{"quick", "brown", "fox", "jump"});
}
/**
* Test english lowercasing. Test both cases (pre-3.1 and post-3.1) to ensure
* we lowercase I correct for non-Turkish languages in either case.