From 1b9f060e25f09445e6f60956793d049dfca7a774 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Sat, 10 Jun 2017 10:30:54 +0200 Subject: [PATCH] LUCENE-7866: Exclude DelimitedTermFrequencyTokenFilter from random data tests in random chains and factory tests --- .../lucene/analysis/core/TestFactories.java | 46 +++++++++++++------ .../analysis/core/TestRandomChains.java | 3 ++ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java index 956a3941b78..499774e5999 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java @@ -21,13 +21,17 @@ import java.io.IOException; import java.io.Reader; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory; import org.apache.lucene.analysis.util.AbstractAnalysisFactory; import org.apache.lucene.analysis.util.CharFilterFactory; import org.apache.lucene.analysis.util.MultiTermAwareComponent; @@ -49,6 +53,12 @@ import org.apache.lucene.util.Version; // TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer public class TestFactories extends BaseTokenStreamTestCase { + + /** Factories that are excluded from testing it with random data */ + private static final Set> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList( + DelimitedTermFrequencyTokenFilterFactory.class + )); + public void test() throws IOException { for (String tokenizer : TokenizerFactory.availableTokenizers()) { doTestTokenizer(tokenizer); @@ -77,11 +87,13 @@ public class TestFactories extends BaseTokenStreamTestCase { assertFalse(mtc instanceof CharFilterFactory); } - // beast it just a little, it shouldnt throw exceptions: - // (it should have thrown them in initialize) - Analyzer a = new FactoryAnalyzer(factory, null, null); - checkRandomData(random(), a, 20, 20, false, false); - a.close(); + if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) { + // beast it just a little, it shouldnt throw exceptions: + // (it should have thrown them in initialize) + Analyzer a = new FactoryAnalyzer(factory, null, null); + checkRandomData(random(), a, 20, 20, false, false); + a.close(); + } } } @@ -99,11 +111,13 @@ public class TestFactories extends BaseTokenStreamTestCase { assertTrue(mtc instanceof TokenFilterFactory); } - // beast it just a little, it shouldnt throw exceptions: - // (it should have thrown them in initialize) - Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null); - checkRandomData(random(), a, 20, 20, false, false); - a.close(); + if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) { + // beast it just a little, it shouldnt throw exceptions: + // (it should have thrown them in initialize) + Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null); + checkRandomData(random(), a, 20, 20, false, false); + a.close(); + } } } @@ -121,11 +135,13 @@ public class TestFactories extends BaseTokenStreamTestCase { assertTrue(mtc instanceof CharFilterFactory); } - // beast it just a little, it shouldnt throw exceptions: - // (it should have thrown them in initialize) - Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory); - checkRandomData(random(), a, 20, 20, false, false); - a.close(); + if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) { + // beast it just a little, it shouldnt throw exceptions: + // (it should have thrown them in initialize) + Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory); + checkRandomData(random(), a, 20, 20, false, false); + a.close(); + } } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index 34c31d2dc58..0162ac74687 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -73,6 +73,7 @@ import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.hunspell.Dictionary; import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter; +import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter; import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter; import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter; import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter; @@ -159,6 +160,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { WordDelimiterFilter.class, // Cannot correct offsets when a char filter had changed them: WordDelimiterGraphFilter.class, + // requires a special encoded token value, so it may fail with random data: + DelimitedTermFrequencyTokenFilter.class, // clones of core's filters: org.apache.lucene.analysis.core.StopFilter.class, org.apache.lucene.analysis.core.LowerCaseFilter.class)) {