mirror of https://github.com/apache/lucene.git
LUCENE-2560: add basic stress tests for analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096178 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2714ba90ca
commit
68061ef921
|
@ -19,11 +19,15 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.StringReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Base class for all Lucene unit tests that use TokenStreams.
|
||||
|
@ -229,4 +233,39 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
assertAnalyzesToReuse(a, input, new String[]{expected});
|
||||
}
|
||||
|
||||
// simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy
|
||||
|
||||
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
|
||||
checkRandomData(random, a, iterations, 20);
|
||||
}
|
||||
|
||||
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
String text;
|
||||
switch(_TestUtil.nextInt(random, 0, 3)) {
|
||||
case 0:
|
||||
text = _TestUtil.randomSimpleString(random);
|
||||
break;
|
||||
case 1:
|
||||
text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength);
|
||||
break;
|
||||
default:
|
||||
text = _TestUtil.randomUnicodeString(random, maxWordLength);
|
||||
}
|
||||
|
||||
TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text));
|
||||
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
|
||||
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
|
||||
List<String> tokens = new ArrayList<String>();
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
tokens.add(termAtt.toString());
|
||||
// TODO: we could collect offsets etc here for better checking that reset() really works.
|
||||
}
|
||||
ts.close();
|
||||
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
|
||||
if (!tokens.isEmpty())
|
||||
assertAnalyzesToReuse(a, text, tokens.toArray(new String[tokens.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -111,4 +111,8 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
|
||||
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,4 +157,8 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, input, expected);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
|
@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "llengües", "llengües");
|
||||
checkOneTermReuse(a, "llengua", "llengu");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -270,4 +270,9 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
|||
newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
|
||||
newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
|
||||
"\ud801\udc1ctest" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
||||
final class PayloadSetter extends TokenFilter {
|
||||
|
|
|
@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertEquals(0, offsetAtt.startOffset());
|
||||
assertEquals(4, offsetAtt.endOffset());
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
|
@ -219,4 +220,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
new String[] { "仮", "名", "遣", "い", "カタカナ" },
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
new String[] { "仮", "名", "遣", "い", "カタカナ" },
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
|
||||
checkOneTermReuse(a, "undersøg", "undersøg");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
|
||||
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -45,4 +46,9 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.junit.Ignore;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
||||
|
@ -36,20 +37,25 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
|||
*
|
||||
*/
|
||||
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer t = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(t,
|
||||
new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
|
||||
}
|
||||
};
|
||||
|
||||
public void testStemming() throws Exception {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer t = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(t,
|
||||
new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
|
||||
}
|
||||
};
|
||||
|
||||
InputStream vocOut = getClass().getResourceAsStream("data.txt");
|
||||
assertVocabulary(analyzer, vocOut);
|
||||
vocOut.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
@Ignore("bugs!")
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
|
||||
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
|
||||
}
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "books", "books");
|
||||
checkOneTermReuse(a, "book", "book");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(analyzer, "congress", "congress");
|
||||
checkOneTerm(analyzer, "serious", "serious");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,20 +37,20 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
|||
* Test the PorterStemFilter with Martin Porter's test data.
|
||||
*/
|
||||
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer t = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Run the stemmer against all strings in voc.txt
|
||||
* The output should be the same as the string in output.txt
|
||||
*/
|
||||
public void testPorterStemFilter() throws Exception {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer t = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
||||
}
|
||||
};
|
||||
|
||||
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
|
||||
}
|
||||
|
||||
|
@ -61,4 +61,9 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
|||
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
|
||||
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "chicana", "chican");
|
||||
checkOneTermReuse(a, "chicano", "chicano");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "zaldiak", "zaldiak");
|
||||
checkOneTermReuse(a, "mendiari", "mendi");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
|
||||
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
|
||||
assertAnalyzesTo(a, "Votre", new String[] { });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "correspondente", "correspondente");
|
||||
checkOneTermReuse(a, "corresponderá", "correspond");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
|
|||
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "हिंदी", "हिंदी");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "babakocsi", "babakocsi");
|
||||
checkOneTermReuse(a, "babakocsijáért", "babakocs");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "արծիվներ", "արծիվներ");
|
||||
checkOneTermReuse(a, "արծիվ", "արծ");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "peledakan", "peledakan");
|
||||
checkOneTermReuse(a, "pembunuhan", "bunuh");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "abbandonata", "abbandonata");
|
||||
checkOneTermReuse(a, "abbandonati", "abbandon");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,4 +45,9 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "tirgiem", "tirgiem");
|
||||
checkOneTermReuse(a, "tirgus", "tirg");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new LatvianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -185,4 +185,9 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
}
|
|
@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
|
||||
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "quilométricas", "quilométricas");
|
||||
checkOneTermReuse(a, "quilométricos", "quilométr");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,4 +66,9 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "absenţa", "absenţa");
|
||||
checkOneTermReuse(a, "absenţi", "absenţ");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase {
|
|||
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
|
||||
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,4 +45,9 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
|
||||
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
|
@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
|
||||
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -142,5 +142,10 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
analyzer,
|
||||
"บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
|
||||
new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
|
||||
}
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "ağacı", "ağacı");
|
||||
checkOneTermReuse(a, "ağaç", "ağaç");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -195,4 +195,9 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
while (stream.incrementToken()) {
|
||||
}
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,4 +50,9 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "studenta", "studenta");
|
||||
checkOneTermReuse(a, "studenci", "student");
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue