LUCENE-3894: for tokenizers, add some tests for larger documents

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1303258 13f79535-47bb-0310-9956-ffa450edef68
2012-03-21 02:54:07 +00:00 · 2012-03-21 02:54:07 +00:00 · dd7bfc78d9
parent 85bba7eed7
commit dd7bfc78d9
6 changed files with 42 additions and 1 deletions
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@ -295,7 +295,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
  
  /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
  public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
-    checkRandomData(random, a, iterations, false);
+    checkRandomData(random, a, iterations, 20, false);
+  }
+  
+  /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
+  public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
+    checkRandomData(random, a, iterations, maxWordLength, false);
  }
  
  /** 
--- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
+++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
@ -236,4 +236,9 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
  public void testRandomStrings() throws Exception {
    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
  }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }
--- a/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java
+++ b/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java
@ -59,4 +59,14 @@ public class TestExtendedMode extends BaseTokenStreamTestCase {
      }
    }
  }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }
--- a/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
+++ b/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
@ -127,6 +127,14 @@ public class TestKuromojiAnalyzer extends BaseTokenStreamTestCase {
                                            KuromojiAnalyzer.getDefaultStopTags());
    checkRandomData(random, a, atLeast(10000));
  }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    final Analyzer a = new KuromojiAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH,
+        KuromojiAnalyzer.getDefaultStopSet(),
+        KuromojiAnalyzer.getDefaultStopTags());
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }

  // Copied from TestKuromojiTokenizer, to make sure passing
  // user dict to analyzer works:
--- a/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
+++ b/modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
@ -41,6 +41,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
+import org.junit.Ignore;

 public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {

@ -190,6 +191,13 @@ public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
    checkRandomData(random, analyzerNoPunct, 10000*RANDOM_MULTIPLIER);
  }
  
+  /** blast some random large strings through the analyzer */
+  @Ignore("FIXME: see LUCENE-3897")
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
+    checkRandomData(random, analyzerNoPunct, 200*RANDOM_MULTIPLIER, 8192);
+  }
+  
  public void testLargeDocReliability() throws Exception {
    for (int i = 0; i < 100; i++) {
      String s = _TestUtil.randomUnicodeString(random, 10000);
--- a/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
+++ b/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
@ -223,4 +223,9 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
  public void testRandomStrings() throws Exception {
    checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
  }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  }
 }