LUCENE-2560: add basic stress tests for analyzers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096178 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-04-23 16:55:15 +00:00
parent 2714ba90ca
commit 68061ef921
54 changed files with 331 additions and 23 deletions

View File

@ -19,11 +19,15 @@ package org.apache.lucene.analysis;
import java.io.StringReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Base class for all Lucene unit tests that use TokenStreams.
@ -229,4 +233,39 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
assertAnalyzesToReuse(a, input, new String[]{expected});
}
// simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
checkRandomData(random, a, iterations, 20);
}
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
for (int i = 0; i < iterations; i++) {
String text;
switch(_TestUtil.nextInt(random, 0, 3)) {
case 0:
text = _TestUtil.randomSimpleString(random);
break;
case 1:
text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength);
break;
default:
text = _TestUtil.randomUnicodeString(random, maxWordLength);
}
TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text));
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
List<String> tokens = new ArrayList<String>();
ts.reset();
while (ts.incrementToken()) {
tokens.add(termAtt.toString());
// TODO: we could collect offsets etc here for better checking that reset() really works.
}
ts.close();
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (!tokens.isEmpty())
assertAnalyzesToReuse(a, text, tokens.toArray(new String[tokens.size()]));
}
}
}

View File

@ -111,4 +111,8 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -157,4 +157,8 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, input, expected);
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "llengües", "llengües");
checkOneTermReuse(a, "llengua", "llengu");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -270,4 +270,9 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}
final class PayloadSetter extends TokenFilter {

View File

@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
dir.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset());
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
@ -219,4 +220,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
new String[] { "", "", "", "", "カタカナ" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
new String[] { "", "", "", "", "カタカナ" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -45,4 +46,9 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.junit.Ignore;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,20 +37,25 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*;
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer t = new KeywordTokenizer(reader);
return new TokenStreamComponents(t,
new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
}
};
public void testStemming() throws Exception {
Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer t = new KeywordTokenizer(reader);
return new TokenStreamComponents(t,
new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
}
};
public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt");
assertVocabulary(analyzer, vocOut);
vocOut.close();
}
/** blast some random strings through the analyzer */
@Ignore("bugs!")
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
}
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
checkOneTerm(analyzer, "congress", "congress");
checkOneTerm(analyzer, "serious", "serious");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -36,21 +36,21 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*;
/**
* Test the PorterStemFilter with Martin Porter's test data.
*/
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
Analyzer a = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer t = new KeywordTokenizer(reader);
return new TokenStreamComponents(t, new PorterStemFilter(t));
}
};
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
Analyzer a = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer t = new KeywordTokenizer(reader);
return new TokenStreamComponents(t, new PorterStemFilter(t));
}
};
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
}
@ -61,4 +61,9 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a, "Votre", new String[] { });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "correspondente", "correspondente");
checkOneTermReuse(a, "corresponderá", "correspond");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिंदी", "हिंदी");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "արծիվներ", "արծիվներ");
checkOneTermReuse(a, "արծիվ", "արծ");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "peledakan", "peledakan");
checkOneTermReuse(a, "pembunuhan", "bunuh");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -45,4 +45,9 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "tirgiem", "tirgiem");
checkOneTermReuse(a, "tirgus", "tirg");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new LatvianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -185,4 +185,9 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "quilométricas", "quilométricas");
checkOneTermReuse(a, "quilométricos", "quilométr");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -66,4 +66,9 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "absenţa", "absenţa");
checkOneTermReuse(a, "absenţi", "absenţ");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase {
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -45,4 +45,9 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -142,5 +142,10 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
analyzer,
"บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
}
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "ağacı", "ağacı");
checkOneTermReuse(a, "ağaç", "ağaç");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -195,4 +195,9 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
while (stream.incrementToken()) {
}
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}

View File

@ -50,4 +50,9 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "studenta", "studenta");
checkOneTermReuse(a, "studenci", "student");
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
}