mirror of https://github.com/apache/lucene.git
LUCENE-3530: Remove deprecated methods in CompoundTokenFilters
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1188613 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ec186e7280
commit
a91efbedd1
|
@ -48,7 +48,7 @@ import org.apache.lucene.util.Version;
|
|||
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
|
||||
* For optional performance (as this filter does lots of lookups to the dictionary,
|
||||
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
|
||||
* {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
|
||||
* {@link Set Sets} to the ctors, they will be automatically
|
||||
* transformed to case-insensitive!
|
||||
*/
|
||||
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
|
||||
|
@ -103,33 +103,6 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
|
|||
this.dictionary = new CharArraySet(matchVersion, dictionary, true);
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Use the constructors taking {@link Set} */
|
||||
@Deprecated
|
||||
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary) {
|
||||
this(matchVersion, input,makeDictionary(matchVersion,dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
|
||||
}
|
||||
|
||||
/** @deprecated Use the constructors taking {@link Set} */
|
||||
@Deprecated
|
||||
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
|
||||
this(matchVersion, input,makeDictionary(matchVersion,dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
|
||||
}
|
||||
|
||||
/** @deprecated Use the constructors taking {@link Set} */
|
||||
@Deprecated
|
||||
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, boolean onlyLongestMatch) {
|
||||
this(matchVersion, input,makeDictionary(matchVersion,dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
|
||||
}
|
||||
|
||||
/** @deprecated Only available for backwards compatibility. */
|
||||
@Deprecated
|
||||
public static CharArraySet makeDictionary(final Version matchVersion, final String[] dictionary) {
|
||||
if (dictionary == null) {
|
||||
return null;
|
||||
}
|
||||
return new CharArraySet(matchVersion, Arrays.asList(dictionary), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
|
@ -190,5 +163,5 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
|
|||
this.endOffset = newStart + length;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,56 +43,10 @@ import org.apache.lucene.util.Version;
|
|||
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
|
||||
* For optional performance (as this filter does lots of lookups to the dictionary,
|
||||
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
|
||||
* {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
|
||||
* {@link Set Sets} to the ctors, they will be automatically
|
||||
* transformed to case-insensitive!
|
||||
*/
|
||||
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
|
||||
/**
|
||||
* Creates a new {@link DictionaryCompoundWordTokenFilter}.
|
||||
* @param matchVersion
|
||||
* Lucene version to enable correct Unicode 4.0 behavior in the
|
||||
* dictionaries if Version > 3.0. See <a
|
||||
* href="CompoundWordTokenFilterBase#version"
|
||||
* >CompoundWordTokenFilterBase</a> for details.
|
||||
* @param input
|
||||
* the {@link TokenStream} to process
|
||||
* @param dictionary
|
||||
* the word dictionary to match against
|
||||
* @param minWordSize
|
||||
* only words longer than this get processed
|
||||
* @param minSubwordSize
|
||||
* only subwords longer than this get to the output stream
|
||||
* @param maxSubwordSize
|
||||
* only subwords shorter than this get to the output stream
|
||||
* @param onlyLongestMatch
|
||||
* Add only the longest matching subword to the stream
|
||||
* @deprecated Use the constructors taking {@link Set}
|
||||
*/
|
||||
@Deprecated
|
||||
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary,
|
||||
int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
|
||||
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link DictionaryCompoundWordTokenFilter}
|
||||
*
|
||||
* @param matchVersion
|
||||
* Lucene version to enable correct Unicode 4.0 behavior in the
|
||||
* dictionaries if Version > 3.0. See <a
|
||||
* href="CompoundWordTokenFilterBase#version"
|
||||
* >CompoundWordTokenFilterBase</a> for details.
|
||||
*
|
||||
* @param input
|
||||
* the {@link TokenStream} to process
|
||||
* @param dictionary
|
||||
* the word dictionary to match against
|
||||
* @deprecated Use the constructors taking {@link Set}
|
||||
*/
|
||||
@Deprecated
|
||||
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary) {
|
||||
super(matchVersion, input, dictionary);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link DictionaryCompoundWordTokenFilter}
|
||||
|
|
|
@ -46,69 +46,12 @@ import org.xml.sax.InputSource;
|
|||
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
|
||||
* For optional performance (as this filter does lots of lookups to the dictionary,
|
||||
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
|
||||
* {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
|
||||
* {@link Set Sets} to the ctors, they will be automatically
|
||||
* transformed to case-insensitive!
|
||||
*/
|
||||
public class HyphenationCompoundWordTokenFilter extends
|
||||
CompoundWordTokenFilterBase {
|
||||
private HyphenationTree hyphenator;
|
||||
|
||||
/**
|
||||
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
|
||||
*
|
||||
* @param matchVersion
|
||||
* Lucene version to enable correct Unicode 4.0 behavior in the
|
||||
* dictionaries if Version > 3.0. See <a
|
||||
* href="CompoundWordTokenFilterBase#version"
|
||||
* >CompoundWordTokenFilterBase</a> for details.
|
||||
* @param input
|
||||
* the {@link TokenStream} to process
|
||||
* @param hyphenator
|
||||
* the hyphenation pattern tree to use for hyphenation
|
||||
* @param dictionary
|
||||
* the word dictionary to match against
|
||||
* @param minWordSize
|
||||
* only words longer than this get processed
|
||||
* @param minSubwordSize
|
||||
* only subwords longer than this get to the output stream
|
||||
* @param maxSubwordSize
|
||||
* only subwords shorter than this get to the output stream
|
||||
* @param onlyLongestMatch
|
||||
* Add only the longest matching subword to the stream
|
||||
* @deprecated Use the constructors taking {@link Set}
|
||||
*/
|
||||
@Deprecated
|
||||
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
|
||||
HyphenationTree hyphenator, String[] dictionary, int minWordSize,
|
||||
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
|
||||
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
|
||||
onlyLongestMatch);
|
||||
|
||||
this.hyphenator = hyphenator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
|
||||
*
|
||||
* @param matchVersion
|
||||
* Lucene version to enable correct Unicode 4.0 behavior in the
|
||||
* dictionaries if Version > 3.0. See <a
|
||||
* href="CompoundWordTokenFilterBase#version"
|
||||
* >CompoundWordTokenFilterBase</a> for details.
|
||||
* @param input
|
||||
* the {@link TokenStream} to process
|
||||
* @param hyphenator
|
||||
* the hyphenation pattern tree to use for hyphenation
|
||||
* @param dictionary
|
||||
* the word dictionary to match against
|
||||
* @deprecated Use the constructors taking {@link Set}
|
||||
*/
|
||||
@Deprecated
|
||||
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
|
||||
HyphenationTree hyphenator, String[] dictionary) {
|
||||
this(matchVersion, input, hyphenator, makeDictionary(matchVersion,dictionary), DEFAULT_MIN_WORD_SIZE,
|
||||
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.compound;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -27,14 +28,20 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
private static CharArraySet makeDictionary(String... dictionary) {
|
||||
return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true);
|
||||
}
|
||||
|
||||
public void testHyphenationCompoundWordsDA() throws Exception {
|
||||
String[] dict = { "læse", "hest" };
|
||||
CharArraySet dict = makeDictionary("læse", "hest");
|
||||
|
||||
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
|
||||
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
|
||||
|
@ -53,7 +60,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
|
||||
String[] dict = { "basketball", "basket", "ball", "kurv" };
|
||||
CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
|
||||
|
||||
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
|
||||
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
|
||||
|
@ -121,9 +128,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testDumbCompoundWordsSE() throws Exception {
|
||||
String[] dict = { "Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
|
||||
CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
|
||||
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll",
|
||||
"Sko", "Vind", "Rute", "Torkare", "Blad" };
|
||||
"Sko", "Vind", "Rute", "Torkare", "Blad");
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new MockTokenizer(
|
||||
|
@ -151,9 +158,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testDumbCompoundWordsSELongestMatch() throws Exception {
|
||||
String[] dict = { "Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
|
||||
CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
|
||||
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll",
|
||||
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
|
||||
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
|
||||
|
@ -168,7 +175,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
|
||||
String[] dict = {"ab", "cd", "ef"};
|
||||
CharArraySet dict = makeDictionary("ab", "cd", "ef");
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
|
@ -189,7 +196,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testWordComponentWithLessThanMinimumLength() throws Exception {
|
||||
String[] dict = {"abc", "d", "efg"};
|
||||
CharArraySet dict = makeDictionary("abc", "d", "efg");
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
|
@ -211,8 +218,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReset() throws Exception {
|
||||
String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz",
|
||||
"Aufgabe", "Überwachung" };
|
||||
CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
|
||||
"Aufgabe", "Überwachung");
|
||||
|
||||
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Rindfleischüberwachungsgesetz"));
|
||||
|
@ -234,7 +241,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testRetainMockAttribute() throws Exception {
|
||||
String[] dict = { "abc", "d", "efg" };
|
||||
CharArraySet dict = makeDictionary("abc", "d", "efg");
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("abcdefg"));
|
||||
TokenStream stream = new MockRetainAttributeFilter(tokenizer);
|
||||
|
|
Loading…
Reference in New Issue