LUCENE-3530: Remove deprecated methods in CompoundTokenFilters

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1188613 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-10-25 11:31:16 +00:00
parent ec186e7280
commit a91efbedd1
4 changed files with 22 additions and 145 deletions

View File

@ -48,7 +48,7 @@ import org.apache.lucene.util.Version;
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
* {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
* {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
@ -104,33 +104,6 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
}
}
/** @deprecated Use the constructors taking {@link Set} */
@Deprecated
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary) {
this(matchVersion, input,makeDictionary(matchVersion,dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
}
/** @deprecated Use the constructors taking {@link Set} */
@Deprecated
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
this(matchVersion, input,makeDictionary(matchVersion,dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
}
/** @deprecated Use the constructors taking {@link Set} */
@Deprecated
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, boolean onlyLongestMatch) {
this(matchVersion, input,makeDictionary(matchVersion,dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
}
/** @deprecated Only available for backwards compatibility. */
@Deprecated
public static CharArraySet makeDictionary(final Version matchVersion, final String[] dictionary) {
if (dictionary == null) {
return null;
}
return new CharArraySet(matchVersion, Arrays.asList(dictionary), true);
}
@Override
public final boolean incrementToken() throws IOException {
if (!tokens.isEmpty()) {

View File

@ -43,56 +43,10 @@ import org.apache.lucene.util.Version;
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
* {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
* {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}.
* @param matchVersion
* Lucene version to enable correct Unicode 4.0 behavior in the
* dictionaries if Version > 3.0. See <a
* href="CompoundWordTokenFilterBase#version"
* >CompoundWordTokenFilterBase</a> for details.
* @param input
* the {@link TokenStream} to process
* @param dictionary
* the word dictionary to match against
* @param minWordSize
* only words longer than this get processed
* @param minSubwordSize
* only subwords longer than this get to the output stream
* @param maxSubwordSize
* only subwords shorter than this get to the output stream
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
* @deprecated Use the constructors taking {@link Set}
*/
@Deprecated
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary,
int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}
*
* @param matchVersion
* Lucene version to enable correct Unicode 4.0 behavior in the
* dictionaries if Version > 3.0. See <a
* href="CompoundWordTokenFilterBase#version"
* >CompoundWordTokenFilterBase</a> for details.
*
* @param input
* the {@link TokenStream} to process
* @param dictionary
* the word dictionary to match against
* @deprecated Use the constructors taking {@link Set}
*/
@Deprecated
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary) {
super(matchVersion, input, dictionary);
}
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}

View File

@ -46,70 +46,13 @@ import org.xml.sax.InputSource;
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
* {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
* {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public class HyphenationCompoundWordTokenFilter extends
CompoundWordTokenFilterBase {
private HyphenationTree hyphenator;
/**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
*
* @param matchVersion
* Lucene version to enable correct Unicode 4.0 behavior in the
* dictionaries if Version > 3.0. See <a
* href="CompoundWordTokenFilterBase#version"
* >CompoundWordTokenFilterBase</a> for details.
* @param input
* the {@link TokenStream} to process
* @param hyphenator
* the hyphenation pattern tree to use for hyphenation
* @param dictionary
* the word dictionary to match against
* @param minWordSize
* only words longer than this get processed
* @param minSubwordSize
* only subwords longer than this get to the output stream
* @param maxSubwordSize
* only subwords shorter than this get to the output stream
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
* @deprecated Use the constructors taking {@link Set}
*/
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, String[] dictionary, int minWordSize,
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch);
this.hyphenator = hyphenator;
}
/**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
*
* @param matchVersion
* Lucene version to enable correct Unicode 4.0 behavior in the
* dictionaries if Version > 3.0. See <a
* href="CompoundWordTokenFilterBase#version"
* >CompoundWordTokenFilterBase</a> for details.
* @param input
* the {@link TokenStream} to process
* @param hyphenator
* the hyphenation pattern tree to use for hyphenation
* @param dictionary
* the word dictionary to match against
* @deprecated Use the constructors taking {@link Set}
*/
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, String[] dictionary) {
this(matchVersion, input, hyphenator, makeDictionary(matchVersion,dictionary), DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
/**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
*

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.compound;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@ -27,14 +28,20 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.xml.sax.InputSource;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
private static CharArraySet makeDictionary(String... dictionary) {
return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true);
}
public void testHyphenationCompoundWordsDA() throws Exception {
String[] dict = { "læse", "hest" };
CharArraySet dict = makeDictionary("læse", "hest");
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
@ -53,7 +60,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
String[] dict = { "basketball", "basket", "ball", "kurv" };
CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
@ -121,9 +128,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testDumbCompoundWordsSE() throws Exception {
String[] dict = { "Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll",
"Sko", "Vind", "Rute", "Torkare", "Blad" };
"Sko", "Vind", "Rute", "Torkare", "Blad");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new MockTokenizer(
@ -151,9 +158,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testDumbCompoundWordsSELongestMatch() throws Exception {
String[] dict = { "Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll",
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
@ -168,7 +175,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
String[] dict = {"ab", "cd", "ef"};
CharArraySet dict = makeDictionary("ab", "cd", "ef");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@ -189,7 +196,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testWordComponentWithLessThanMinimumLength() throws Exception {
String[] dict = {"abc", "d", "efg"};
CharArraySet dict = makeDictionary("abc", "d", "efg");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@ -211,8 +218,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testReset() throws Exception {
String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz",
"Aufgabe", "Überwachung" };
CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
"Aufgabe", "Überwachung");
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Rindfleischüberwachungsgesetz"));
@ -234,7 +241,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
public void testRetainMockAttribute() throws Exception {
String[] dict = { "abc", "d", "efg" };
CharArraySet dict = makeDictionary("abc", "d", "efg");
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("abcdefg"));
TokenStream stream = new MockRetainAttributeFilter(tokenizer);