LUCENE-2560: add basic stress tests for analyzers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096178 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-04-23 16:55:15 +00:00
parent 2714ba90ca
commit 68061ef921
54 changed files with 331 additions and 23 deletions

View File

@ -19,11 +19,15 @@ package org.apache.lucene.analysis;
import java.io.StringReader; import java.io.StringReader;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/** /**
* Base class for all Lucene unit tests that use TokenStreams. * Base class for all Lucene unit tests that use TokenStreams.
@ -229,4 +233,39 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
assertAnalyzesToReuse(a, input, new String[]{expected}); assertAnalyzesToReuse(a, input, new String[]{expected});
} }
// simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
checkRandomData(random, a, iterations, 20);
}
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
for (int i = 0; i < iterations; i++) {
String text;
switch(_TestUtil.nextInt(random, 0, 3)) {
case 0:
text = _TestUtil.randomSimpleString(random);
break;
case 1:
text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength);
break;
default:
text = _TestUtil.randomUnicodeString(random, maxWordLength);
}
TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text));
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
List<String> tokens = new ArrayList<String>();
ts.reset();
while (ts.incrementToken()) {
tokens.add(termAtt.toString());
// TODO: we could collect offsets etc here for better checking that reset() really works.
}
ts.close();
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (!tokens.isEmpty())
assertAnalyzesToReuse(a, text, tokens.toArray(new String[tokens.size()]));
}
}
} }

View File

@ -111,4 +111,8 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesToReuse(analyzer, testString, new String[] { "t" }); assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" }); assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -157,4 +157,8 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, input, expected); checkOneTermReuse(a, input, expected);
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "llengües", "llengües"); checkOneTermReuse(a, "llengües", "llengües");
checkOneTermReuse(a, "llengua", "llengu"); checkOneTermReuse(a, "llengua", "llengu");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -270,4 +270,9 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) }); newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" }); "\ud801\udc1ctest" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }
final class PayloadSetter extends TokenFilter { final class PayloadSetter extends TokenFilter {

View File

@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
dir.close(); dir.close();
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
assertEquals(0, offsetAtt.startOffset()); assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset()); assertEquals(4, offsetAtt.endOffset());
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
@ -219,4 +220,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
new String[] { "", "", "", "", "カタカナ" }, new String[] { "", "", "", "", "カタカナ" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" }); new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
new String[] { "", "", "", "", "カタカナ" }, new String[] { "", "", "", "", "カタカナ" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" }); new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"}); assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "undersøgelse", "undersøgelse"); checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg"); checkOneTermReuse(a, "undersøg", "undersøg");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "Schaltflächen", "schaltflach"); checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech"); checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -45,4 +46,9 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt"); assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt"); assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.junit.Ignore;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,8 +37,6 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*;
* *
*/ */
public class TestGermanStemFilter extends BaseTokenStreamTestCase { public class TestGermanStemFilter extends BaseTokenStreamTestCase {
public void testStemming() throws Exception {
Analyzer analyzer = new ReusableAnalyzerBase() { Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
@ -48,8 +47,15 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
} }
}; };
public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt"); InputStream vocOut = getClass().getResourceAsStream("data.txt");
assertVocabulary(analyzer, vocOut); assertVocabulary(analyzer, vocOut);
vocOut.close(); vocOut.close();
} }
/** blast some random strings through the analyzer */
@Ignore("bugs!")
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" }); new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "books", "books"); checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book"); checkOneTermReuse(a, "book", "book");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
checkOneTerm(analyzer, "congress", "congress"); checkOneTerm(analyzer, "congress", "congress");
checkOneTerm(analyzer, "serious", "serious"); checkOneTerm(analyzer, "serious", "serious");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -37,11 +37,6 @@ import static org.apache.lucene.analysis.util.VocabularyAssert.*;
* Test the PorterStemFilter with Martin Porter's test data. * Test the PorterStemFilter with Martin Porter's test data.
*/ */
public class TestPorterStemFilter extends BaseTokenStreamTestCase { public class TestPorterStemFilter extends BaseTokenStreamTestCase {
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
Analyzer a = new ReusableAnalyzerBase() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
@ -51,6 +46,11 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
} }
}; };
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt"); assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
} }
@ -61,4 +61,9 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "chicana", "chican"); checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano"); checkOneTermReuse(a, "chicano", "chicano");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt"); assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "zaldiak", "zaldiak"); checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi"); checkOneTermReuse(a, "mendiari", "mendi");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" }); "brown", "fox" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään"); checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt"); assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31); FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a, "Votre", new String[] { }); assertAnalyzesTo(a, "Votre", new String[] { });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt"); assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt"); assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "correspondente", "correspondente"); checkOneTermReuse(a, "correspondente", "correspondente");
checkOneTermReuse(a, "corresponderá", "correspond"); checkOneTermReuse(a, "corresponderá", "correspond");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
HindiAnalyzer.getDefaultStopSet(), exclusionSet); HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिंदी", "हिंदी"); checkOneTermReuse(a, "हिंदी", "हिंदी");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "babakocsi", "babakocsi"); checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs"); checkOneTermReuse(a, "babakocsijáért", "babakocs");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "արծիվներ", "արծիվներ"); checkOneTermReuse(a, "արծիվներ", "արծիվներ");
checkOneTermReuse(a, "արծիվ", "արծ"); checkOneTermReuse(a, "արծիվ", "արծ");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "peledakan", "peledakan"); checkOneTermReuse(a, "peledakan", "peledakan");
checkOneTermReuse(a, "pembunuhan", "bunuh"); checkOneTermReuse(a, "pembunuhan", "bunuh");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "abbandonata", "abbandonata"); checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon"); checkOneTermReuse(a, "abbandonati", "abbandon");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -45,4 +45,9 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt"); assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "tirgiem", "tirgiem"); checkOneTermReuse(a, "tirgiem", "tirgiem");
checkOneTermReuse(a, "tirgus", "tirg"); checkOneTermReuse(a, "tirgus", "tirg");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new LatvianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -185,4 +185,9 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected); checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene"); checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "quilométricas", "quilométricas"); checkOneTermReuse(a, "quilométricas", "quilométricas");
checkOneTermReuse(a, "quilométricos", "quilométr"); checkOneTermReuse(a, "quilométricos", "quilométr");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt"); assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt"); assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -66,4 +66,9 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt"); assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "absenţa", "absenţa"); checkOneTermReuse(a, "absenţa", "absenţa");
checkOneTermReuse(a, "absenţi", "absenţ"); checkOneTermReuse(a, "absenţi", "absenţ");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase {
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -45,4 +45,9 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt"); assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne"); checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase {
public void testVocabulary() throws IOException { public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt"); assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -143,4 +143,9 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
"บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" }); new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "ağacı", "ağacı"); checkOneTermReuse(a, "ağacı", "ağacı");
checkOneTermReuse(a, "ağaç", "ağaç"); checkOneTermReuse(a, "ağaç", "ağaç");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -195,4 +195,9 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
while (stream.incrementToken()) { while (stream.incrementToken()) {
} }
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }

View File

@ -50,4 +50,9 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "studenta", "studenta"); checkOneTermReuse(a, "studenta", "studenta");
checkOneTermReuse(a, "studenci", "student"); checkOneTermReuse(a, "studenci", "student");
} }
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
} }