mirror of https://github.com/apache/lucene.git
LUCENE-2165: SnowballAnalyzer was missing Set-based ctor
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@891209 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
383fc2d635
commit
f616a47036
|
@ -33,6 +33,9 @@ API Changes
|
|||
* LUCENE-2108: Add SpellChecker.close, to close the underlying
|
||||
reader. (Eirik Bjørsnøs via Mike McCandless)
|
||||
|
||||
* LUCENE-2165: Add a constructor to SnowballAnalyzer that takes a Set of
|
||||
stopwords, and deprecate the String[] one. (Nick Burch via Robert Muir)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2102: Add a Turkish LowerCase Filter. TurkishLowerCaseFilter handles
|
||||
|
|
|
@ -51,11 +51,21 @@ public final class SnowballAnalyzer extends Analyzer {
|
|||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/** Builds the named analyzer with the given stop words. */
|
||||
/**
|
||||
* Builds the named analyzer with the given stop words.
|
||||
* @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.
|
||||
*/
|
||||
public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
|
||||
this(matchVersion, name);
|
||||
stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
|
||||
}
|
||||
|
||||
/** Builds the named analyzer with the given stop words. */
|
||||
public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
|
||||
this(matchVersion, name);
|
||||
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
|
||||
stopWords));
|
||||
}
|
||||
|
||||
/** Constructs a {@link StandardTokenizer} filtered by a {@link
|
||||
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
|
@ -36,6 +37,13 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
new String[]{"he", "abhor", "accent"});
|
||||
}
|
||||
|
||||
public void testStopwords() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English",
|
||||
StandardAnalyzer.STOP_WORDS_SET);
|
||||
assertAnalyzesTo(a, "the quick brown fox jumped",
|
||||
new String[]{"quick", "brown", "fox", "jump"});
|
||||
}
|
||||
|
||||
/**
|
||||
* Test english lowercasing. Test both cases (pre-3.1 and post-3.1) to ensure
|
||||
|
|
Loading…
Reference in New Issue