mirror of https://github.com/apache/lucene.git
LUCENE-2560: stress tests for icu integration
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096339 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
593d7a54ea
commit
44ba0859db
|
@ -29,15 +29,14 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|||
* Tests ICUFoldingFilter
|
||||
*/
|
||||
public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUFoldingFilter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
}
|
||||
};
|
||||
public void testDefaults() throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUFoldingFilter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
}
|
||||
};
|
||||
|
||||
// case folding
|
||||
assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
|
||||
|
||||
|
@ -76,4 +75,9 @@ public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
|
|||
// handling of decomposed combining-dot-above
|
||||
assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,16 +31,15 @@ import com.ibm.icu.text.Normalizer2;
|
|||
* Tests the ICUNormalizer2Filter
|
||||
*/
|
||||
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUNormalizer2Filter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
}
|
||||
};
|
||||
|
||||
public void testDefaults() throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUNormalizer2Filter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
}
|
||||
};
|
||||
|
||||
// case folding
|
||||
assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
|
||||
|
||||
|
@ -75,4 +74,9 @@ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
|||
// decompose EAcute into E + combining Acute
|
||||
assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,10 +18,15 @@ package org.apache.lucene.analysis.icu;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
@ -83,4 +88,17 @@ public class TestICUTransformFilter extends BaseTokenStreamTestCase {
|
|||
TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform);
|
||||
assertTokenStreamContents(ts, new String[] { expected });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
final Transliterator transform = Transliterator.getInstance("Any-Latin");
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
|
||||
}
|
||||
};
|
||||
checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -232,4 +232,9 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
|||
new String[] { "仮", "名", "遣", "い", "カタカナ" },
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue