LUCENE-3894: add large docs tests for more tokenizers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1303273 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-03-21 03:59:14 +00:00
parent dd7bfc78d9
commit 1156de050f
12 changed files with 125 additions and 0 deletions

View File

@ -511,6 +511,25 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
checkRandomData(random, analyzer, numRounds);
}
public void testRandomHugeStrings() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, tokenizer);
}
@Override
protected Reader initReader(Reader reader) {
return new HTMLStripCharFilter(CharReader.get(reader));
}
};
int numRounds = RANDOM_MULTIPLIER * 200;
checkRandomData(random, analyzer, numRounds, 8192);
}
public void testServerSideIncludes() throws Exception {
String test = "one<img src=\"image.png\"\n"
+ " alt = \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}' -->\"\n\n"

View File

@ -272,4 +272,9 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
/** blast some random strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -214,6 +214,13 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
}
final class PayloadSetter extends TokenFilter {

View File

@ -316,4 +316,9 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -242,4 +242,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -474,4 +474,9 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -208,4 +208,16 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new PathHierarchyTokenizer(reader);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -170,4 +170,16 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -1144,4 +1144,16 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase {
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
}
};
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -428,6 +428,31 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
}
}
/** simple random test like testRandom2, but for large docs
*/
public void testRandomHuge() throws Exception {
final int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
b = new SynonymMap.Builder(random.nextBoolean());
final int numEntries = atLeast(10);
for (int j = 0; j < numEntries; j++) {
add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
}
final SynonymMap map = b.build();
final boolean ignoreCase = random.nextBoolean();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
}
};
checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
}
}
// LUCENE-3375
public void testVanishingTerms() throws Exception {
String testFile =

View File

@ -167,6 +167,11 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
// LUCENE-3044
public void testAttributeReuse() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);

View File

@ -185,4 +185,17 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new WikipediaTokenizer(reader);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
}