mirror of https://github.com/apache/lucene.git
LUCENE-3400: Removed DutchAnalyzer.setStemDictionary
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1161484 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a05f774573
commit
1057d24e7f
|
@ -31,6 +31,8 @@ API Changes
|
|||
* LUCENE-2514, LUCENE-2551: JDK and ICU CollationKeyAnalyzers were changed to
|
||||
use pure byte keys when Version >= 4.0. This cuts sort key size approximately
|
||||
in half. (Robert Muir)
|
||||
|
||||
* LUCENE-3400: Removed DutchAnalyzer.setStemDictionary (Chris Male)
|
||||
|
||||
New Features
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
|||
*/
|
||||
private Set<?> excltable = Collections.emptySet();
|
||||
|
||||
private Map<String, String> stemdict = new HashMap<String, String>();
|
||||
private final Map<String, String> stemdict = new HashMap<String, String>();
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
|
@ -130,23 +130,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
|||
excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a stemdictionary file , that overrules the stemming algorithm
|
||||
* This is a textfile that contains per line
|
||||
* <tt>word<b>\t</b>stem</tt>, i.e: two tab seperated words
|
||||
*/
|
||||
public void setStemDictionary(File stemdictFile) {
|
||||
try {
|
||||
stemdict = WordlistLoader.getStemDict(stemdictFile);
|
||||
setPreviousTokenStream(null); // force a new stemmer to be created
|
||||
} catch (IOException e) {
|
||||
// TODO: throw IOException
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns a (possibly reused) {@link TokenStream} which tokenizes all the
|
||||
* text in the provided {@link Reader}.
|
||||
|
|
|
@ -150,18 +150,6 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that changes to the dictionary stemming table are applied immediately
|
||||
* when using reusable token streams.
|
||||
*/
|
||||
public void testStemDictionaryReuse() throws Exception {
|
||||
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
File customDictFile = getDataFile("customStemDict.txt");
|
||||
a.setStemDictionary(customDictFile);
|
||||
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
|
||||
}
|
||||
|
||||
/**
|
||||
* Prior to 3.1, this analyzer had no lowercase filter.
|
||||
* stopwords were case sensitive. Preserve this for back compat.
|
||||
|
|
Loading…
Reference in New Issue