LUCENE-3894: for tokenizers, add some tests for larger documents

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1303258 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-03-21 02:54:07 +00:00
parent 85bba7eed7
commit dd7bfc78d9
6 changed files with 42 additions and 1 deletions

View File

@ -295,7 +295,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
/** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
checkRandomData(random, a, iterations, false);
checkRandomData(random, a, iterations, 20, false);
}
/** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
checkRandomData(random, a, iterations, maxWordLength, false);
}
/**

View File

@ -236,4 +236,9 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -59,4 +59,14 @@ public class TestExtendedMode extends BaseTokenStreamTestCase {
}
}
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -127,6 +127,14 @@ public class TestKuromojiAnalyzer extends BaseTokenStreamTestCase {
KuromojiAnalyzer.getDefaultStopTags());
checkRandomData(random, a, atLeast(10000));
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
final Analyzer a = new KuromojiAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH,
KuromojiAnalyzer.getDefaultStopSet(),
KuromojiAnalyzer.getDefaultStopTags());
checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
}
// Copied from TestKuromojiTokenizer, to make sure passing
// user dict to analyzer works:

View File

@ -41,6 +41,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
import org.junit.Ignore;
public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
@ -190,6 +191,13 @@ public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
checkRandomData(random, analyzerNoPunct, 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
@Ignore("FIXME: see LUCENE-3897")
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
checkRandomData(random, analyzerNoPunct, 200*RANDOM_MULTIPLIER, 8192);
}
public void testLargeDocReliability() throws Exception {
for (int i = 0; i < 100; i++) {
String s = _TestUtil.randomUnicodeString(random, 10000);

View File

@ -223,4 +223,9 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
}
}