mirror of https://github.com/apache/lucene.git
LUCENE-3894: add large docs tests for more tokenizers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1303273 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dd7bfc78d9
commit
1156de050f
|
@ -511,6 +511,25 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
|
|||
checkRandomData(random, analyzer, numRounds);
|
||||
}
|
||||
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader initReader(Reader reader) {
|
||||
return new HTMLStripCharFilter(CharReader.get(reader));
|
||||
}
|
||||
};
|
||||
|
||||
int numRounds = RANDOM_MULTIPLIER * 200;
|
||||
checkRandomData(random, analyzer, numRounds, 8192);
|
||||
}
|
||||
|
||||
public void testServerSideIncludes() throws Exception {
|
||||
String test = "one<img src=\"image.png\"\n"
|
||||
+ " alt = \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}' -->\"\n\n"
|
||||
|
|
|
@ -272,4 +272,9 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -214,6 +214,13 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
||||
final class PayloadSetter extends TokenFilter {
|
||||
|
|
|
@ -316,4 +316,9 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -242,4 +242,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -474,4 +474,9 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -208,4 +208,16 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new PathHierarchyTokenizer(reader);
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
}
|
||||
};
|
||||
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -170,4 +170,16 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
}
|
||||
};
|
||||
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1144,4 +1144,16 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -428,6 +428,31 @@ public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/** simple random test like testRandom2, but for large docs
|
||||
*/
|
||||
public void testRandomHuge() throws Exception {
|
||||
final int numIters = atLeast(10);
|
||||
for (int i = 0; i < numIters; i++) {
|
||||
b = new SynonymMap.Builder(random.nextBoolean());
|
||||
final int numEntries = atLeast(10);
|
||||
for (int j = 0; j < numEntries; j++) {
|
||||
add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
|
||||
}
|
||||
final SynonymMap map = b.build();
|
||||
final boolean ignoreCase = random.nextBoolean();
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
|
||||
}
|
||||
};
|
||||
|
||||
checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-3375
|
||||
public void testVanishingTerms() throws Exception {
|
||||
String testFile =
|
||||
|
|
|
@ -167,6 +167,11 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
|
||||
// LUCENE-3044
|
||||
public void testAttributeReuse() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
|
||||
|
|
|
@ -185,4 +185,17 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WikipediaTokenizer(reader);
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
}
|
||||
};
|
||||
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue