From 34adebab3b881917d8cc492fdef6b7560a286a9b Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Sun, 22 Sep 2013 13:57:43 +0000 Subject: [PATCH] LUCENE-5235: Tokenizers now throw an IllegalStateException if the consumer does not call reset() before consuming the stream. Previous versions throwed NullPointerException or ArrayIndexOutOfBoundsException on best effort which was not user-friendly. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1525362 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 11 ++ .../analysis/core/KeywordTokenizer.java | 1 + .../ngram/Lucene43NGramTokenizer.java | 3 +- .../analysis/pattern/PatternTokenizer.java | 1 + .../analysis/standard/ClassicTokenizer.java | 9 +- .../analysis/standard/StandardTokenizer.java | 9 +- .../standard/UAX29URLEmailTokenizer.java | 11 +- .../lucene/analysis/util/CharTokenizer.java | 4 +- .../wikipedia/WikipediaTokenizer.java | 11 +- .../analysis/ar/TestArabicAnalyzer.java | 8 +- .../ar/TestArabicNormalizationFilter.java | 2 +- .../analysis/ar/TestArabicStemFilter.java | 2 +- .../analysis/bg/TestBulgarianAnalyzer.java | 4 +- .../analysis/bg/TestBulgarianStemmer.java | 2 +- .../analysis/br/TestBrazilianStemmer.java | 4 +- .../analysis/ca/TestCatalanAnalyzer.java | 8 +- .../lucene/analysis/cjk/TestCJKAnalyzer.java | 6 +- .../analysis/cjk/TestCJKWidthFilter.java | 2 +- .../compound/TestCompoundWordTokenFilter.java | 4 +- .../lucene/analysis/cz/TestCzechAnalyzer.java | 4 +- .../lucene/analysis/cz/TestCzechStemmer.java | 2 +- .../analysis/da/TestDanishAnalyzer.java | 8 +- .../analysis/de/TestGermanAnalyzer.java | 12 +- .../de/TestGermanLightStemFilter.java | 2 +- .../de/TestGermanMinimalStemFilter.java | 2 +- .../de/TestGermanNormalizationFilter.java | 2 +- .../analysis/de/TestGermanStemFilter.java | 2 +- .../lucene/analysis/el/GreekAnalyzerTest.java | 6 +- .../lucene/analysis/el/TestGreekStemmer.java | 2 +- .../analysis/en/TestEnglishAnalyzer.java | 14 +- .../en/TestEnglishMinimalStemFilter.java | 2 +- .../lucene/analysis/en/TestKStemmer.java | 2 +- .../analysis/en/TestPorterStemFilter.java | 2 +- .../analysis/es/TestSpanishAnalyzer.java | 8 +- .../es/TestSpanishLightStemFilter.java | 2 +- .../analysis/eu/TestBasqueAnalyzer.java | 8 +- .../analysis/fa/TestPersianAnalyzer.java | 4 +- .../fa/TestPersianNormalizationFilter.java | 2 +- .../analysis/fi/TestFinnishAnalyzer.java | 8 +- .../fi/TestFinnishLightStemFilter.java | 2 +- .../analysis/fr/TestFrenchAnalyzer.java | 10 +- .../fr/TestFrenchLightStemFilter.java | 2 +- .../fr/TestFrenchMinimalStemFilter.java | 2 +- .../lucene/analysis/ga/TestIrishAnalyzer.java | 8 +- .../analysis/ga/TestIrishLowerCaseFilter.java | 2 +- .../analysis/gl/TestGalicianAnalyzer.java | 8 +- .../gl/TestGalicianMinimalStemFilter.java | 2 +- .../analysis/gl/TestGalicianStemFilter.java | 2 +- .../lucene/analysis/hi/TestHindiAnalyzer.java | 6 +- .../analysis/hi/TestHindiNormalizer.java | 2 +- .../lucene/analysis/hi/TestHindiStemmer.java | 2 +- .../analysis/hu/TestHungarianAnalyzer.java | 8 +- .../hu/TestHungarianLightStemFilter.java | 2 +- .../hunspell/HunspellStemFilterTest.java | 2 +- .../analysis/hy/TestArmenianAnalyzer.java | 8 +- .../analysis/id/TestIndonesianAnalyzer.java | 8 +- .../analysis/id/TestIndonesianStemmer.java | 126 +++++++++--------- .../analysis/in/TestIndicNormalizer.java | 2 +- .../analysis/it/TestItalianAnalyzer.java | 8 +- .../it/TestItalianLightStemFilter.java | 2 +- .../analysis/lv/TestLatvianAnalyzer.java | 8 +- .../analysis/lv/TestLatvianStemmer.java | 2 +- .../miscellaneous/TestASCIIFoldingFilter.java | 2 +- .../TestCapitalizationFilter.java | 2 +- .../TestHyphenatedWordsFilter.java | 2 +- .../miscellaneous/TestLengthFilter.java | 2 +- .../TestRemoveDuplicatesTokenFilter.java | 2 +- .../miscellaneous/TestTrimFilter.java | 2 +- .../ngram/EdgeNGramTokenFilterTest.java | 1 - .../lucene/analysis/nl/TestDutchStemmer.java | 16 +-- .../analysis/no/TestNorwegianAnalyzer.java | 8 +- .../no/TestNorwegianLightStemFilter.java | 2 +- .../no/TestNorwegianMinimalStemFilter.java | 2 +- .../pattern/TestPatternReplaceFilter.java | 2 +- .../DelimitedPayloadTokenFilterTest.java | 14 +- .../analysis/pt/TestPortugueseAnalyzer.java | 8 +- .../pt/TestPortugueseLightStemFilter.java | 2 +- .../pt/TestPortugueseMinimalStemFilter.java | 2 +- .../analysis/pt/TestPortugueseStemFilter.java | 2 +- .../reverse/TestReverseStringFilter.java | 2 +- .../analysis/ro/TestRomanianAnalyzer.java | 8 +- .../analysis/ru/TestRussianAnalyzer.java | 6 +- .../ru/TestRussianLightStemFilter.java | 2 +- .../shingle/ShingleAnalyzerWrapperTest.java | 26 ++-- .../analysis/shingle/ShingleFilterTest.java | 2 +- .../analysis/snowball/TestSnowball.java | 2 +- .../analysis/sv/TestSwedishAnalyzer.java | 8 +- .../sv/TestSwedishLightStemFilter.java | 2 +- .../lucene/analysis/th/TestThaiAnalyzer.java | 8 +- .../analysis/tr/TestTurkishAnalyzer.java | 8 +- .../tr/TestTurkishLowerCaseFilter.java | 2 +- .../lucene/analysis/util/TestElision.java | 2 +- .../icu/segmentation/ICUTokenizer.java | 3 +- .../analysis/icu/TestICUFoldingFilter.java | 2 +- .../icu/TestICUNormalizer2Filter.java | 2 +- .../analysis/icu/TestICUTransformFilter.java | 2 +- .../icu/segmentation/TestICUTokenizer.java | 2 +- .../segmentation/TestWithCJKBigramFilter.java | 4 +- .../lucene/analysis/ja/JapaneseTokenizer.java | 9 +- .../ja/TestJapaneseBaseFormFilter.java | 2 +- .../ja/TestJapaneseKatakanaStemFilter.java | 2 +- .../ja/TestJapaneseReadingFormFilter.java | 2 +- .../morfologik/TestMorfologikAnalyzer.java | 28 ++-- .../phonetic/DoubleMetaphoneFilterTest.java | 2 +- .../phonetic/TestBeiderMorseFilter.java | 2 +- .../TestDoubleMetaphoneFilterFactory.java | 21 --- .../analysis/phonetic/TestPhoneticFilter.java | 2 +- .../analysis/cn/smart/SentenceTokenizer.java | 1 + .../cn/smart/TestSmartChineseAnalyzer.java | 6 +- .../analysis/pl/TestPolishAnalyzer.java | 8 +- .../analysis/uima/BaseUIMATokenizer.java | 1 + .../org/apache/lucene/analysis/Tokenizer.java | 56 +++++--- .../lucene/analysis/TestGraphTokenizers.java | 3 +- .../lucene/analysis/TestMockAnalyzer.java | 14 +- .../apache/lucene/index/TestIndexWriter.java | 17 +-- .../lucene/search/TestTermRangeQuery.java | 3 +- .../highlight/OffsetLimitTokenFilterTest.java | 2 +- .../vectorhighlight/AbstractTestCase.java | 3 +- .../classic/TestMultiPhraseQueryParsing.java | 1 + .../analysis/BaseTokenStreamTestCase.java | 52 ++++---- .../lucene/analysis/VocabularyAssert.java | 4 +- .../solr/analysis/TrieTokenizerFactory.java | 6 +- .../highlight/DefaultSolrHighlighter.java | 5 + .../org/apache/solr/schema/BoolField.java | 1 + .../apache/solr/schema/PreAnalyzedField.java | 2 + 125 files changed, 458 insertions(+), 381 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c2c8c224bc3..611b80b4db0 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -74,6 +74,12 @@ New Features * LUCENE-5219: Add support to SynonymFilterFactory for custom parsers. (Ryan Ernst via Robert Muir) +* LUCENE-5235: Tokenizers now throw an IllegalStateException if the + consumer does not call reset() before consuming the stream. Previous + versions throwed NullPointerException or ArrayIndexOutOfBoundsException + on best effort which was not user-friendly. + (Uwe Schindler, Robert Muir) + Bug Fixes * LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead @@ -94,6 +100,11 @@ Documentation Changes in backwards compatibility policy +* LUCENE-5235: Sub classes of Tokenizer have to call super.reset() + when implementing reset(). Otherwise the consumer will get an + IllegalStateException because the Reader is not correctly assigned. + (Uwe Schindler, Robert Muir) + * LUCENE-5204: Directory doesn't have default implementations for LockFactory-related methods, which have been moved to BaseDirectory. If you had a custom Directory implementation that extended Directory, you need to diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java index 29239fe327b..eaaf693a911 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java @@ -88,6 +88,7 @@ public final class KeywordTokenizer extends Tokenizer { @Override public void reset() throws IOException { + super.reset(); this.done = false; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java index 25693da23ba..823bb012469 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java @@ -140,7 +140,8 @@ public final class Lucene43NGramTokenizer extends Tokenizer { } @Override - public void end() { + public void end() throws IOException { + super.end(); // set final offset final int finalOffset = correctOffset(charsRead); this.offsetAtt.setOffset(finalOffset, finalOffset); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java index 08f463003d2..c5ca0500e5e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java @@ -138,6 +138,7 @@ public final class PatternTokenizer extends Tokenizer { @Override public void reset() throws IOException { + super.reset(); fillBuffer(str, input); matcher.reset(str); index = 0; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java index fe1baa30203..729fd5c11bb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java @@ -114,7 +114,7 @@ public final class ClassicTokenizer extends Tokenizer { } private void init(Version matchVersion) { - this.scanner = new ClassicTokenizerImpl(null); // best effort NPE if you dont call reset + this.scanner = new ClassicTokenizerImpl(input); } // this tokenizer generates three attributes: @@ -170,9 +170,16 @@ public final class ClassicTokenizer extends Tokenizer { // adjust any skipped tokens posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions); } + + @Override + public void close() throws IOException { + super.close(); + scanner.yyreset(input); + } @Override public void reset() throws IOException { + super.reset(); scanner.yyreset(input); skippedPositions = 0; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index e30fa2450fc..d23bc6385fa 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -128,7 +128,7 @@ public final class StandardTokenizer extends Tokenizer { } private final void init(Version matchVersion) { - this.scanner = new StandardTokenizerImpl(null); // best effort NPE if you dont call reset + this.scanner = new StandardTokenizerImpl(input); } // this tokenizer generates three attributes: @@ -179,8 +179,15 @@ public final class StandardTokenizer extends Tokenizer { posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions); } + @Override + public void close() throws IOException { + super.close(); + scanner.yyreset(input); + } + @Override public void reset() throws IOException { + super.reset(); scanner.yyreset(input); skippedPositions = 0; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index 9e1b23cb569..061aefbaeab 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -111,8 +111,8 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { this.scanner = getScannerFor(matchVersion); } - private static StandardTokenizerInterface getScannerFor(Version matchVersion) { - return new UAX29URLEmailTokenizerImpl(null); // best effort NPE if you dont call reset + private StandardTokenizerInterface getScannerFor(Version matchVersion) { + return new UAX29URLEmailTokenizerImpl(input); } // this tokenizer generates three attributes: @@ -157,9 +157,16 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { // adjust any skipped tokens posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions); } + + @Override + public void close() throws IOException { + super.close(); + scanner.yyreset(input); + } @Override public void reset() throws IOException { + super.reset(); scanner.yyreset(input); skippedPositions = 0; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java index d19760f2a1c..71653d1a0a8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java @@ -62,8 +62,7 @@ public abstract class CharTokenizer extends Tokenizer { charUtils = CharacterUtils.getInstance(matchVersion); } - // note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset() - private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0; + private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0; private static final int MAX_WORD_LEN = 255; private static final int IO_BUFFER_SIZE = 4096; @@ -150,6 +149,7 @@ public abstract class CharTokenizer extends Tokenizer { @Override public void reset() throws IOException { + super.reset(); bufferIndex = 0; offset = 0; dataLen = 0; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java index 8d61852156c..5708420f5a5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java @@ -143,7 +143,7 @@ public final class WikipediaTokenizer extends Tokenizer { */ public WikipediaTokenizer(Reader input, int tokenOutput, Set untokenizedTypes) { super(input); - this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset + this.scanner = new WikipediaTokenizerImpl(this.input); init(tokenOutput, untokenizedTypes); } @@ -156,7 +156,7 @@ public final class WikipediaTokenizer extends Tokenizer { */ public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set untokenizedTypes) { super(factory, input); - this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset + this.scanner = new WikipediaTokenizerImpl(this.input); init(tokenOutput, untokenizedTypes); } @@ -295,6 +295,12 @@ public final class WikipediaTokenizer extends Tokenizer { offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); } + @Override + public void close() throws IOException { + super.close(); + scanner.yyreset(input); + } + /* * (non-Javadoc) * @@ -302,6 +308,7 @@ public final class WikipediaTokenizer extends Tokenizer { */ @Override public void reset() throws IOException { + super.reset(); scanner.yyreset(input); tokens = null; scanner.reset(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java index 6549efadceb..80b067e9052 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java @@ -60,8 +60,8 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { */ public void testReusableTokenStream() throws Exception { ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" }); - assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker + assertAnalyzesTo(a, "كبير", new String[] { "كبير" }); + assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker } /** @@ -86,12 +86,12 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("ساهدهات"), false); ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); - assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); + assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); - assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); + assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java index 9932fcc3863..10661256bf3 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java @@ -102,7 +102,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ArabicNormalizationFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java index 256b3c1aa2d..cd919e48c11 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java @@ -142,6 +142,6 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ArabicStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java index d390cdf7fe6..e1579dc6d01 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java @@ -49,8 +49,8 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws IOException { Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(a, "документи", new String[] {"документ"}); - assertAnalyzesToReuse(a, "документ", new String[] {"документ"}); + assertAnalyzesTo(a, "документи", new String[] {"документ"}); + assertAnalyzesTo(a, "документ", new String[] {"документ"}); } /** diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java index 85bcfd4d5ff..bd84728060f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java @@ -234,6 +234,6 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new BulgarianStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java index 174348d30bd..b870b585295 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java @@ -157,7 +157,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } private void checkReuse(Analyzer a, String input, String expected) throws Exception { - checkOneTermReuse(a, input, expected); + checkOneTerm(a, input, expected); } /** blast some random strings through the analyzer */ @@ -173,6 +173,6 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new BrazilianStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java index de7ac01bdd3..4d32666b9c1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java @@ -34,8 +34,8 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "llengües", "llengu"); - checkOneTermReuse(a, "llengua", "llengu"); + checkOneTerm(a, "llengües", "llengu"); + checkOneTerm(a, "llengua", "llengu"); // stopword assertAnalyzesTo(a, "un", new String[] { }); } @@ -52,8 +52,8 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("llengües"), false); Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT, CatalanAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "llengües", "llengües"); - checkOneTermReuse(a, "llengua", "llengu"); + checkOneTerm(a, "llengües", "llengües"); + checkOneTerm(a, "llengua", "llengu"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java index 549f819da67..953dfc0a6d6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java @@ -167,14 +167,14 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws IOException { - assertAnalyzesToReuse(analyzer, "あいうえおabcかきくけこ", + assertAnalyzesTo(analyzer, "あいうえおabcかきくけこ", new String[] { "あい", "いう", "うえ", "えお", "abc", "かき", "きく", "くけ", "けこ" }, new int[] { 0, 1, 2, 3, 5, 8, 9, 10, 11 }, new int[] { 2, 3, 4, 5, 8, 10, 11, 12, 13 }, new String[] { "", "", "", "", "", "", "", "", "" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1}); - assertAnalyzesToReuse(analyzer, "あいうえおabんcかきくけ こ", + assertAnalyzesTo(analyzer, "あいうえおabんcかきくけ こ", new String[] { "あい", "いう", "うえ", "えお", "ab", "ん", "c", "かき", "きく", "くけ", "こ" }, new int[] { 0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14 }, new int[] { 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15 }, @@ -288,6 +288,6 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java index 04939139536..45292b1df34 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKWidthFilter.java @@ -74,6 +74,6 @@ public class TestCJKWidthFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java index 5d6b23b669a..4972a1537b4 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java @@ -377,7 +377,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm()); final HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); @@ -390,6 +390,6 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, filter); } }; - checkOneTermReuse(b, "", ""); + checkOneTerm(b, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java index ecb0a0789cc..e668a9da770 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java @@ -39,8 +39,8 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); - assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" }); + assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); + assertAnalyzesTo(analyzer, "Česká Republika", new String[] { "česk", "republik" }); } public void testWithStemExclusionSet() throws IOException{ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java index 3bae6fb3026..c3b39872bd7 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java @@ -294,7 +294,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new CzechStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java index 7fb7c66f940..eddf531d0de 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java @@ -34,8 +34,8 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "undersøg", "undersøg"); - checkOneTermReuse(a, "undersøgelse", "undersøg"); + checkOneTerm(a, "undersøg", "undersøg"); + checkOneTerm(a, "undersøgelse", "undersøg"); // stopword assertAnalyzesTo(a, "på", new String[] {}); } @@ -45,8 +45,8 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false); Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT, DanishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "undersøgelse", "undersøgelse"); - checkOneTermReuse(a, "undersøg", "undersøg"); + checkOneTerm(a, "undersøgelse", "undersøgelse"); + checkOneTerm(a, "undersøg", "undersøg"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java index df3fa5e92cd..f453320eca7 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java @@ -29,9 +29,9 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestGermanAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT); - checkOneTermReuse(a, "Tisch", "tisch"); - checkOneTermReuse(a, "Tische", "tisch"); - checkOneTermReuse(a, "Tischen", "tisch"); + checkOneTerm(a, "Tisch", "tisch"); + checkOneTerm(a, "Tische", "tisch"); + checkOneTerm(a, "Tischen", "tisch"); } public void testWithKeywordAttribute() throws IOException { @@ -46,7 +46,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { public void testStemExclusionTable() throws Exception { GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false)); - checkOneTermReuse(a, "tischen", "tischen"); + checkOneTerm(a, "tischen", "tischen"); } /** test some features of the new snowball filter @@ -55,8 +55,8 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { public void testGermanSpecials() throws Exception { GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT); // a/o/u + e is equivalent to the umlaut form - checkOneTermReuse(a, "Schaltflächen", "schaltflach"); - checkOneTermReuse(a, "Schaltflaechen", "schaltflach"); + checkOneTerm(a, "Schaltflächen", "schaltflach"); + checkOneTerm(a, "Schaltflaechen", "schaltflach"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java index e9f3606c1cf..2ec406a1272 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java @@ -75,6 +75,6 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GermanLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java index 0c39f9437fd..afb99b30fa3 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java @@ -87,6 +87,6 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GermanMinimalStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java index 8794076a0bc..3d267f9067d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanNormalizationFilter.java @@ -75,6 +75,6 @@ public class TestGermanNormalizationFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GermanNormalizationFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java index 9001e06b1af..b64ab42fadb 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java @@ -88,6 +88,6 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java index e64e4a0db89..86dae1ee6d1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java @@ -51,16 +51,16 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); // Verify the correct analysis of capitals and small accented letters, and // stemming - assertAnalyzesToReuse(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", + assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", new String[] { "μια", "εξαιρετ", "καλ", "πλουσ", "σειρ", "χαρακτηρ", "ελληνικ", "γλωσσ" }); // Verify the correct analysis of small letters with diaeresis and the elimination // of punctuation marks - assertAnalyzesToReuse(a, "Προϊόντα (και) [πολλαπλές] - ΑΝΑΓΚΕΣ", + assertAnalyzesTo(a, "Προϊόντα (και) [πολλαπλές] - ΑΝΑΓΚΕΣ", new String[] { "προιοντ", "πολλαπλ", "αναγκ" }); // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words - assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", + assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java index 84595fcfb82..79be1a34052 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java @@ -536,6 +536,6 @@ public class TestGreekStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GreekStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java index 41cbb4dc64e..82b2b036be1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java @@ -34,14 +34,14 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "books", "book"); - checkOneTermReuse(a, "book", "book"); + checkOneTerm(a, "books", "book"); + checkOneTerm(a, "book", "book"); // stopword assertAnalyzesTo(a, "the", new String[] {}); // possessive removal - checkOneTermReuse(a, "steven's", "steven"); - checkOneTermReuse(a, "steven\u2019s", "steven"); - checkOneTermReuse(a, "steven\uFF07s", "steven"); + checkOneTerm(a, "steven's", "steven"); + checkOneTerm(a, "steven\u2019s", "steven"); + checkOneTerm(a, "steven\uFF07s", "steven"); } /** test use of exclusion set */ @@ -49,8 +49,8 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false); Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT, EnglishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "books", "books"); - checkOneTermReuse(a, "book", "book"); + checkOneTerm(a, "books", "books"); + checkOneTerm(a, "book", "book"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java index 1cdba74dc5a..86da36469d6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java @@ -65,6 +65,6 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new EnglishMinimalStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java index 3c4c5a92ac2..28e85d0e23b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestKStemmer.java @@ -62,7 +62,7 @@ public class TestKStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } /****** requires original java kstem source code to create map diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java index a9f1edd2829..786e83bbcf9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java @@ -74,6 +74,6 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PorterStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java index ae25af75398..2338906e810 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java @@ -34,8 +34,8 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "chicana", "chican"); - checkOneTermReuse(a, "chicano", "chican"); + checkOneTerm(a, "chicana", "chican"); + checkOneTerm(a, "chicano", "chican"); // stopword assertAnalyzesTo(a, "los", new String[] {}); } @@ -45,8 +45,8 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false); Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT, SpanishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "chicana", "chican"); - checkOneTermReuse(a, "chicano", "chicano"); + checkOneTerm(a, "chicana", "chican"); + checkOneTerm(a, "chicano", "chicano"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java index a5f0af06fc7..ea0455f2681 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java @@ -59,6 +59,6 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new SpanishLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java index 31720e43d7c..ca9aa67c151 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java @@ -34,8 +34,8 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "zaldi", "zaldi"); - checkOneTermReuse(a, "zaldiak", "zaldi"); + checkOneTerm(a, "zaldi", "zaldi"); + checkOneTerm(a, "zaldiak", "zaldi"); // stopword assertAnalyzesTo(a, "izan", new String[] { }); } @@ -45,8 +45,8 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false); Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT, BasqueAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "zaldiak", "zaldiak"); - checkOneTermReuse(a, "mendiari", "mendi"); + checkOneTerm(a, "zaldiak", "zaldiak"); + checkOneTerm(a, "mendiari", "mendi"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java index 1a3f02064cc..64510cf40b5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java @@ -208,8 +208,8 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { */ public void testReusableTokenStream() throws Exception { Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" }); - assertAnalyzesToReuse(a, "برگ‌ها", new String[] { "برگ" }); + assertAnalyzesTo(a, "خورده مي شده بوده باشد", new String[] { "خورده" }); + assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" }); } /** diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java index 97dca44ea8c..4c89f99be3a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java @@ -72,7 +72,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PersianNormalizationFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java index d3fa72944bc..e3ef862e6a2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java @@ -34,8 +34,8 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); - checkOneTermReuse(a, "edeltäjistään", "edeltäj"); + checkOneTerm(a, "edeltäjiinsä", "edeltäj"); + checkOneTerm(a, "edeltäjistään", "edeltäj"); // stopword assertAnalyzesTo(a, "olla", new String[] {}); } @@ -45,8 +45,8 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT, FinnishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); - checkOneTermReuse(a, "edeltäjistään", "edeltäjistään"); + checkOneTerm(a, "edeltäjiinsä", "edeltäj"); + checkOneTerm(a, "edeltäjistään", "edeltäjistään"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java index 50ad1fc2a49..e5679276fc1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java @@ -75,6 +75,6 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new FinnishLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java index e1953bf2ff4..6680b7e57f2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java @@ -117,13 +117,13 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); // stopwords - assertAnalyzesToReuse( + assertAnalyzesTo( fa, "le la chien les aux chat du des à cheval", new String[] { "chien", "chat", "cheval" }); // some nouns and adjectives - assertAnalyzesToReuse( + assertAnalyzesTo( fa, "lances chismes habitable chiste éléments captifs", new String[] { @@ -140,7 +140,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { set.add("habitable"); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); - assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable", + assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" }); fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); @@ -169,7 +169,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { /** test accent-insensitive */ public void testAccentInsensitive() throws Exception { Analyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT); - checkOneTermReuse(a, "sécuritaires", "securitair"); - checkOneTermReuse(a, "securitaires", "securitair"); + checkOneTerm(a, "sécuritaires", "securitair"); + checkOneTerm(a, "securitaires", "securitair"); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java index 8ca9b99f3ed..36e87549523 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java @@ -205,6 +205,6 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new FrenchLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java index b1ea6d01631..042c53e8dcb 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java @@ -89,6 +89,6 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new FrenchMinimalStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java index 9a62c36f77d..8db7c66e5b1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java @@ -34,8 +34,8 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "siopadóireacht", "siopadóir"); - checkOneTermReuse(a, "síceapatacha", "síceapaite"); + checkOneTerm(a, "siopadóireacht", "siopadóir"); + checkOneTerm(a, "síceapatacha", "síceapaite"); // stopword assertAnalyzesTo(a, "le", new String[] { }); } @@ -52,8 +52,8 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("feirmeoireacht"), false); Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT, IrishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "feirmeoireacht", "feirmeoireacht"); - checkOneTermReuse(a, "siopadóireacht", "siopadóir"); + checkOneTerm(a, "feirmeoireacht", "feirmeoireacht"); + checkOneTerm(a, "siopadóireacht", "siopadóir"); } /** test special hyphen handling */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java index 7e8fc76a0f4..2f2ed07b946 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java @@ -52,6 +52,6 @@ public class TestIrishLowerCaseFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new IrishLowerCaseFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java index f7ada397193..0ce5d21aaba 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java @@ -34,8 +34,8 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "correspondente", "correspond"); - checkOneTermReuse(a, "corresponderá", "correspond"); + checkOneTerm(a, "correspondente", "correspond"); + checkOneTerm(a, "corresponderá", "correspond"); // stopword assertAnalyzesTo(a, "e", new String[] {}); } @@ -45,8 +45,8 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("correspondente"), false); Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT, GalicianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "correspondente", "correspondente"); - checkOneTermReuse(a, "corresponderá", "correspond"); + checkOneTerm(a, "correspondente", "correspondente"); + checkOneTerm(a, "corresponderá", "correspond"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java index aca739595d1..6d9b89119f6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java @@ -79,6 +79,6 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java index 1e4c219975e..72943b2a9f6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java @@ -58,6 +58,6 @@ public class TestGalicianStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new GalicianStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java index 54760361bd8..bfb4f77f543 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java @@ -34,15 +34,15 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws Exception { Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT); // two ways to write 'hindi' itself. - checkOneTermReuse(a, "हिन्दी", "हिंद"); - checkOneTermReuse(a, "हिंदी", "हिंद"); + checkOneTerm(a, "हिन्दी", "हिंद"); + checkOneTerm(a, "हिंदी", "हिंद"); } public void testExclusionSet() throws Exception { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("हिंदी"), false); Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT, HindiAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "हिंदी", "हिंदी"); + checkOneTerm(a, "हिंदी", "हिंदी"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java index de5eac352cc..81395e38d61 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java @@ -75,6 +75,6 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new HindiNormalizationFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java index 8df076ff115..fcd7dd66b95 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java @@ -97,6 +97,6 @@ public class TestHindiStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new HindiStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java index a1f137ddd0c..a395def06ac 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java @@ -34,8 +34,8 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "babakocsi", "babakocs"); - checkOneTermReuse(a, "babakocsijáért", "babakocs"); + checkOneTerm(a, "babakocsi", "babakocs"); + checkOneTerm(a, "babakocsijáért", "babakocs"); // stopword assertAnalyzesTo(a, "által", new String[] {}); } @@ -45,8 +45,8 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT, HungarianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "babakocsi", "babakocsi"); - checkOneTermReuse(a, "babakocsijáért", "babakocs"); + checkOneTerm(a, "babakocsi", "babakocsi"); + checkOneTerm(a, "babakocsijáért", "babakocs"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java index 21904d2e634..681b7be184b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java @@ -70,6 +70,6 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new HungarianLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java index 4679b45490b..941eb1d7541 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java @@ -89,6 +89,6 @@ public class HunspellStemFilterTest extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY, _TestUtil.nextInt(random(), 1, 3))); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java index ab6e99925e5..ef74e391b46 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java @@ -34,8 +34,8 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "արծիվ", "արծ"); - checkOneTermReuse(a, "արծիվներ", "արծ"); + checkOneTerm(a, "արծիվ", "արծ"); + checkOneTerm(a, "արծիվներ", "արծ"); // stopword assertAnalyzesTo(a, "է", new String[] { }); } @@ -45,8 +45,8 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("արծիվներ"), false); Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT, ArmenianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "արծիվներ", "արծիվներ"); - checkOneTermReuse(a, "արծիվ", "արծ"); + checkOneTerm(a, "արծիվներ", "արծիվներ"); + checkOneTerm(a, "արծիվ", "արծ"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java index e1686bf3f6d..ce3cd6edd20 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java @@ -34,8 +34,8 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "peledakan", "ledak"); - checkOneTermReuse(a, "pembunuhan", "bunuh"); + checkOneTerm(a, "peledakan", "ledak"); + checkOneTerm(a, "pembunuhan", "bunuh"); // stopword assertAnalyzesTo(a, "bahwa", new String[] {}); } @@ -45,8 +45,8 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false); Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT, IndonesianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "peledakan", "peledakan"); - checkOneTermReuse(a, "pembunuhan", "bunuh"); + checkOneTerm(a, "peledakan", "peledakan"); + checkOneTerm(a, "pembunuhan", "bunuh"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java index b4aa0f1f5ae..165c3c30966 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java @@ -41,73 +41,73 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase { /** Some examples from the paper */ public void testExamples() throws IOException { checkOneTerm(a, "bukukah", "buku"); - checkOneTermReuse(a, "adalah", "ada"); - checkOneTermReuse(a, "bukupun", "buku"); - checkOneTermReuse(a, "bukuku", "buku"); - checkOneTermReuse(a, "bukumu", "buku"); - checkOneTermReuse(a, "bukunya", "buku"); - checkOneTermReuse(a, "mengukur", "ukur"); - checkOneTermReuse(a, "menyapu", "sapu"); - checkOneTermReuse(a, "menduga", "duga"); - checkOneTermReuse(a, "menuduh", "uduh"); - checkOneTermReuse(a, "membaca", "baca"); - checkOneTermReuse(a, "merusak", "rusak"); - checkOneTermReuse(a, "pengukur", "ukur"); - checkOneTermReuse(a, "penyapu", "sapu"); - checkOneTermReuse(a, "penduga", "duga"); - checkOneTermReuse(a, "pembaca", "baca"); - checkOneTermReuse(a, "diukur", "ukur"); - checkOneTermReuse(a, "tersapu", "sapu"); - checkOneTermReuse(a, "kekasih", "kasih"); - checkOneTermReuse(a, "berlari", "lari"); - checkOneTermReuse(a, "belajar", "ajar"); - checkOneTermReuse(a, "bekerja", "kerja"); - checkOneTermReuse(a, "perjelas", "jelas"); - checkOneTermReuse(a, "pelajar", "ajar"); - checkOneTermReuse(a, "pekerja", "kerja"); - checkOneTermReuse(a, "tarikkan", "tarik"); - checkOneTermReuse(a, "ambilkan", "ambil"); - checkOneTermReuse(a, "mengambilkan", "ambil"); - checkOneTermReuse(a, "makanan", "makan"); - checkOneTermReuse(a, "janjian", "janji"); - checkOneTermReuse(a, "perjanjian", "janji"); - checkOneTermReuse(a, "tandai", "tanda"); - checkOneTermReuse(a, "dapati", "dapat"); - checkOneTermReuse(a, "mendapati", "dapat"); - checkOneTermReuse(a, "pantai", "panta"); + checkOneTerm(a, "adalah", "ada"); + checkOneTerm(a, "bukupun", "buku"); + checkOneTerm(a, "bukuku", "buku"); + checkOneTerm(a, "bukumu", "buku"); + checkOneTerm(a, "bukunya", "buku"); + checkOneTerm(a, "mengukur", "ukur"); + checkOneTerm(a, "menyapu", "sapu"); + checkOneTerm(a, "menduga", "duga"); + checkOneTerm(a, "menuduh", "uduh"); + checkOneTerm(a, "membaca", "baca"); + checkOneTerm(a, "merusak", "rusak"); + checkOneTerm(a, "pengukur", "ukur"); + checkOneTerm(a, "penyapu", "sapu"); + checkOneTerm(a, "penduga", "duga"); + checkOneTerm(a, "pembaca", "baca"); + checkOneTerm(a, "diukur", "ukur"); + checkOneTerm(a, "tersapu", "sapu"); + checkOneTerm(a, "kekasih", "kasih"); + checkOneTerm(a, "berlari", "lari"); + checkOneTerm(a, "belajar", "ajar"); + checkOneTerm(a, "bekerja", "kerja"); + checkOneTerm(a, "perjelas", "jelas"); + checkOneTerm(a, "pelajar", "ajar"); + checkOneTerm(a, "pekerja", "kerja"); + checkOneTerm(a, "tarikkan", "tarik"); + checkOneTerm(a, "ambilkan", "ambil"); + checkOneTerm(a, "mengambilkan", "ambil"); + checkOneTerm(a, "makanan", "makan"); + checkOneTerm(a, "janjian", "janji"); + checkOneTerm(a, "perjanjian", "janji"); + checkOneTerm(a, "tandai", "tanda"); + checkOneTerm(a, "dapati", "dapat"); + checkOneTerm(a, "mendapati", "dapat"); + checkOneTerm(a, "pantai", "panta"); } /** Some detailed analysis examples (that might not be the best) */ public void testIRExamples() throws IOException { checkOneTerm(a, "penyalahgunaan", "salahguna"); - checkOneTermReuse(a, "menyalahgunakan", "salahguna"); - checkOneTermReuse(a, "disalahgunakan", "salahguna"); + checkOneTerm(a, "menyalahgunakan", "salahguna"); + checkOneTerm(a, "disalahgunakan", "salahguna"); - checkOneTermReuse(a, "pertanggungjawaban", "tanggungjawab"); - checkOneTermReuse(a, "mempertanggungjawabkan", "tanggungjawab"); - checkOneTermReuse(a, "dipertanggungjawabkan", "tanggungjawab"); + checkOneTerm(a, "pertanggungjawaban", "tanggungjawab"); + checkOneTerm(a, "mempertanggungjawabkan", "tanggungjawab"); + checkOneTerm(a, "dipertanggungjawabkan", "tanggungjawab"); - checkOneTermReuse(a, "pelaksanaan", "laksana"); - checkOneTermReuse(a, "pelaksana", "laksana"); - checkOneTermReuse(a, "melaksanakan", "laksana"); - checkOneTermReuse(a, "dilaksanakan", "laksana"); + checkOneTerm(a, "pelaksanaan", "laksana"); + checkOneTerm(a, "pelaksana", "laksana"); + checkOneTerm(a, "melaksanakan", "laksana"); + checkOneTerm(a, "dilaksanakan", "laksana"); - checkOneTermReuse(a, "melibatkan", "libat"); - checkOneTermReuse(a, "terlibat", "libat"); + checkOneTerm(a, "melibatkan", "libat"); + checkOneTerm(a, "terlibat", "libat"); - checkOneTermReuse(a, "penculikan", "culik"); - checkOneTermReuse(a, "menculik", "culik"); - checkOneTermReuse(a, "diculik", "culik"); - checkOneTermReuse(a, "penculik", "culik"); + checkOneTerm(a, "penculikan", "culik"); + checkOneTerm(a, "menculik", "culik"); + checkOneTerm(a, "diculik", "culik"); + checkOneTerm(a, "penculik", "culik"); - checkOneTermReuse(a, "perubahan", "ubah"); - checkOneTermReuse(a, "peledakan", "ledak"); - checkOneTermReuse(a, "penanganan", "tangan"); - checkOneTermReuse(a, "kepolisian", "polisi"); - checkOneTermReuse(a, "kenaikan", "naik"); - checkOneTermReuse(a, "bersenjata", "senjata"); - checkOneTermReuse(a, "penyelewengan", "seleweng"); - checkOneTermReuse(a, "kecelakaan", "celaka"); + checkOneTerm(a, "perubahan", "ubah"); + checkOneTerm(a, "peledakan", "ledak"); + checkOneTerm(a, "penanganan", "tangan"); + checkOneTerm(a, "kepolisian", "polisi"); + checkOneTerm(a, "kenaikan", "naik"); + checkOneTerm(a, "bersenjata", "senjata"); + checkOneTerm(a, "penyelewengan", "seleweng"); + checkOneTerm(a, "kecelakaan", "celaka"); } /* inflectional-only stemming */ @@ -122,15 +122,15 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase { /** Test stemming only inflectional suffixes */ public void testInflectionalOnly() throws IOException { checkOneTerm(b, "bukunya", "buku"); - checkOneTermReuse(b, "bukukah", "buku"); - checkOneTermReuse(b, "bukunyakah", "buku"); - checkOneTermReuse(b, "dibukukannya", "dibukukan"); + checkOneTerm(b, "bukukah", "buku"); + checkOneTerm(b, "bukunyakah", "buku"); + checkOneTerm(b, "dibukukannya", "dibukukan"); } public void testShouldntStem() throws IOException { checkOneTerm(a, "bersenjata", "senjata"); - checkOneTermReuse(a, "bukukah", "buku"); - checkOneTermReuse(a, "gigi", "gigi"); + checkOneTerm(a, "bukukah", "buku"); + checkOneTerm(a, "gigi", "gigi"); } public void testEmptyTerm() throws IOException { @@ -141,6 +141,6 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java index 79fb5942d19..4e54ecfc15d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java @@ -60,6 +60,6 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new IndicNormalizationFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java index df5597cde6f..c93781cf7e1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java @@ -37,8 +37,8 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "abbandonata", "abbandonat"); - checkOneTermReuse(a, "abbandonati", "abbandonat"); + checkOneTerm(a, "abbandonata", "abbandonat"); + checkOneTerm(a, "abbandonati", "abbandonat"); // stopword assertAnalyzesTo(a, "dallo", new String[] {}); } @@ -48,8 +48,8 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false); Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT, ItalianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "abbandonata", "abbandonata"); - checkOneTermReuse(a, "abbandonati", "abbandonat"); + checkOneTerm(a, "abbandonata", "abbandonata"); + checkOneTerm(a, "abbandonati", "abbandonat"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java index fdc925e8d93..236d12926a1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java @@ -59,6 +59,6 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ItalianLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java index 7fd13faa660..33ac2e3f12b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java @@ -34,8 +34,8 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "tirgiem", "tirg"); - checkOneTermReuse(a, "tirgus", "tirg"); + checkOneTerm(a, "tirgiem", "tirg"); + checkOneTerm(a, "tirgus", "tirg"); // stopword assertAnalyzesTo(a, "un", new String[] {}); } @@ -45,8 +45,8 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false); Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT, LatvianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "tirgiem", "tirgiem"); - checkOneTermReuse(a, "tirgus", "tirg"); + checkOneTerm(a, "tirgiem", "tirgiem"); + checkOneTerm(a, "tirgus", "tirg"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java index 1dfcf111a1c..045a26dd160 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java @@ -278,6 +278,6 @@ public class TestLatvianStemmer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java index ed535f73a32..58ea61fd58d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java @@ -1934,6 +1934,6 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java index 39df7406ef2..780a83159e7 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java @@ -143,6 +143,6 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new CapitalizationFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java index 1a8bfc751c7..1cb1334fbc4 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java @@ -85,6 +85,6 @@ public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java index d54d394bbc4..8c1d1c5adc2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java @@ -48,7 +48,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java index 668d93b4177..4e54548b171 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java @@ -175,7 +175,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index 9161418fbf8..7df95fa318f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -115,6 +115,6 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java index 7aeec71a20f..41e88824f86 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java @@ -197,7 +197,6 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q")); tk = new ShingleFilter(tk); tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10); - tk.reset(); assertTokenStreamContents(tk, new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6,11,11,14 }, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java index 5ce448f334d..1f554b44e0b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java @@ -115,24 +115,24 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { public void testSnowballCorrectness() throws Exception { Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); - checkOneTermReuse(a, "opheffen", "opheff"); - checkOneTermReuse(a, "opheffende", "opheff"); - checkOneTermReuse(a, "opheffing", "opheff"); + checkOneTerm(a, "opheffen", "opheff"); + checkOneTerm(a, "opheffende", "opheff"); + checkOneTerm(a, "opheffing", "opheff"); } public void testReusableTokenStream() throws Exception { Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); - checkOneTermReuse(a, "lichaamsziek", "lichaamsziek"); - checkOneTermReuse(a, "lichamelijk", "licham"); - checkOneTermReuse(a, "lichamelijke", "licham"); - checkOneTermReuse(a, "lichamelijkheden", "licham"); + checkOneTerm(a, "lichaamsziek", "lichaamsziek"); + checkOneTerm(a, "lichamelijk", "licham"); + checkOneTerm(a, "lichamelijke", "licham"); + checkOneTerm(a, "lichamelijkheden", "licham"); } public void testExclusionTableViaCtor() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("lichamelijk"); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); - assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); + assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java index c37acc06989..98fb8f66964 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java @@ -34,8 +34,8 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "havnedistriktene", "havnedistrikt"); - checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); + checkOneTerm(a, "havnedistriktene", "havnedistrikt"); + checkOneTerm(a, "havnedistrikter", "havnedistrikt"); // stopword assertAnalyzesTo(a, "det", new String[] {}); } @@ -45,8 +45,8 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("havnedistriktene"), false); Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT, NorwegianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "havnedistriktene", "havnedistriktene"); - checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); + checkOneTerm(a, "havnedistriktene", "havnedistriktene"); + checkOneTerm(a, "havnedistrikter", "havnedistrikt"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java index 55eefe1a2f8..f3a4b9a25b1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java @@ -93,6 +93,6 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new NorwegianLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java index 38866f690a5..945e0a5e512 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java @@ -92,6 +92,6 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java index 148537e0bab..34791f384a5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java @@ -114,7 +114,7 @@ public class TestPatternReplaceFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PatternReplaceFilter(tokenizer, Pattern.compile("a"), "b", true)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java index f60744e6df4..eddadfabce1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java @@ -34,6 +34,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); + filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); @@ -45,6 +46,8 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); + filter.end(); + filter.close(); } public void testNext() throws Exception { @@ -53,6 +56,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); + filter.reset(); assertTermEquals("The", filter, null); assertTermEquals("quick", filter, "JJ".getBytes("UTF-8")); assertTermEquals("red", filter, "JJ".getBytes("UTF-8")); @@ -64,6 +68,8 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { assertTermEquals("brown", filter, "JJ".getBytes("UTF-8")); assertTermEquals("dogs", filter, "NN".getBytes("UTF-8")); assertFalse(filter.incrementToken()); + filter.end(); + filter.close(); } @@ -72,6 +78,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); + filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f)); @@ -83,6 +90,8 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f)); assertFalse(filter.incrementToken()); + filter.end(); + filter.close(); } public void testIntEncoding() throws Exception { @@ -90,6 +99,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); + filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2)); @@ -101,12 +111,13 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83)); assertFalse(filter.incrementToken()); + filter.end(); + filter.close(); } void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class); - stream.reset(); assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.toString()); BytesRef payload = payloadAtt.getPayload(); @@ -123,7 +134,6 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { - stream.reset(); assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.toString()); BytesRef payload = payAtt.getPayload(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java index 17921633c7a..402cf5c3579 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java @@ -34,8 +34,8 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "quilométricas", "quilometric"); - checkOneTermReuse(a, "quilométricos", "quilometric"); + checkOneTerm(a, "quilométricas", "quilometric"); + checkOneTerm(a, "quilométricos", "quilometric"); // stopword assertAnalyzesTo(a, "não", new String[] {}); } @@ -45,8 +45,8 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT, PortugueseAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "quilométricas", "quilométricas"); - checkOneTermReuse(a, "quilométricos", "quilometric"); + checkOneTerm(a, "quilométricas", "quilométricas"); + checkOneTerm(a, "quilométricos", "quilometric"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java index 5cb3416893a..80842795e24 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java @@ -123,6 +123,6 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java index e9abb946045..1c7d63cdbf7 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java @@ -97,6 +97,6 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PortugueseMinimalStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java index 9d083d1d74d..5b08ce32577 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java @@ -96,6 +96,6 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PortugueseStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java index 0f2db59d95d..5620f20f901 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java @@ -113,6 +113,6 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java index 2cac9b5908c..03c96d096ff 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java @@ -34,8 +34,8 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "absenţa", "absenţ"); - checkOneTermReuse(a, "absenţi", "absenţ"); + checkOneTerm(a, "absenţa", "absenţ"); + checkOneTerm(a, "absenţi", "absenţ"); // stopword assertAnalyzesTo(a, "îl", new String[] {}); } @@ -45,8 +45,8 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false); Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT, RomanianAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "absenţa", "absenţa"); - checkOneTermReuse(a, "absenţi", "absenţ"); + checkOneTerm(a, "absenţa", "absenţa"); + checkOneTerm(a, "absenţi", "absenţ"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java index 5677e401320..fbc683675e1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java @@ -39,9 +39,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", + assertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" }); - assertAnalyzesToReuse(a, "Но знание это хранилось в тайне", + assertAnalyzesTo(a, "Но знание это хранилось в тайне", new String[] { "знан", "эт", "хран", "тайн" }); } @@ -50,7 +50,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("представление"); Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set); - assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", + assertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java index 5a6e4f60475..9622305fde9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java @@ -75,6 +75,6 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new RussianLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java index 6a578d2352f..855a14e3825 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java @@ -140,12 +140,12 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { Analyzer a = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2); - assertAnalyzesToReuse(a, "please divide into shingles", + assertAnalyzesTo(a, "please divide into shingles", new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, new int[] { 0, 0, 7, 7, 14, 14, 19 }, new int[] { 6, 13, 13, 18, 18, 27, 27 }, new int[] { 1, 0, 1, 0, 1, 0, 1 }); - assertAnalyzesToReuse(a, "divide me up again", + assertAnalyzesTo(a, "divide me up again", new String[] { "divide", "divide me", "me", "me up", "up", "up again", "again" }, new int[] { 0, 0, 7, 7, 10, 10, 13 }, new int[] { 6, 9, 9, 12, 12, 18, 18 }, @@ -155,7 +155,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { public void testNonDefaultMinShingleSize() throws Exception { ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4); - assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles", + assertAnalyzesTo(analyzer, "please divide this sentence into shingles", new String[] { "please", "please divide this", "please divide this sentence", "divide", "divide this sentence", "divide this sentence into", "this", "this sentence into", "this sentence into shingles", @@ -168,7 +168,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { analyzer = new ShingleAnalyzerWrapper( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4, ShingleFilter.TOKEN_SEPARATOR, false, false); - assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles", + assertAnalyzesTo(analyzer, "please divide this sentence into shingles", new String[] { "please divide this", "please divide this sentence", "divide this sentence", "divide this sentence into", "this sentence into", "this sentence into shingles", @@ -181,7 +181,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { public void testNonDefaultMinAndSameMaxShingleSize() throws Exception { ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3); - assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles", + assertAnalyzesTo(analyzer, "please divide this sentence into shingles", new String[] { "please", "please divide this", "divide", "divide this sentence", "this", "this sentence into", @@ -194,7 +194,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { analyzer = new ShingleAnalyzerWrapper( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3, ShingleFilter.TOKEN_SEPARATOR, false, false); - assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles", + assertAnalyzesTo(analyzer, "please divide this sentence into shingles", new String[] { "please divide this", "divide this sentence", "this sentence into", @@ -210,7 +210,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", true, false); - assertAnalyzesToReuse(analyzer, "please divide into shingles", + assertAnalyzesTo(analyzer, "please divide into shingles", new String[] { "please", "pleasedivide", "divide", "divideinto", "into", "intoshingles", @@ -224,7 +224,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", false, false); - assertAnalyzesToReuse(analyzer, "please divide into shingles", + assertAnalyzesTo(analyzer, "please divide into shingles", new String[] { "pleasedivide", "divideinto", "intoshingles" }, @@ -239,7 +239,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, null, true, false); - assertAnalyzesToReuse(analyzer, "please divide into shingles", + assertAnalyzesTo(analyzer, "please divide into shingles", new String[] { "please", "pleasedivide", "divide", "divideinto", "into", "intoshingles", @@ -253,7 +253,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", false, false); - assertAnalyzesToReuse(analyzer, "please divide into shingles", + assertAnalyzesTo(analyzer, "please divide into shingles", new String[] { "pleasedivide", "divideinto", "intoshingles" }, @@ -267,7 +267,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", true, false); - assertAnalyzesToReuse(analyzer, "please divide into shingles", + assertAnalyzesTo(analyzer, "please divide into shingles", new String[] { "please", "pleasedivide", "divide", "divideinto", "into", "intoshingles", @@ -281,7 +281,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", false, false); - assertAnalyzesToReuse(analyzer, "please divide into shingles", + assertAnalyzesTo(analyzer, "please divide into shingles", new String[] { "pleasedivide", "divideinto", "intoshingles" }, @@ -296,7 +296,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", false, true); - assertAnalyzesToReuse(analyzer, "please", + assertAnalyzesTo(analyzer, "please", new String[] { "please" }, new int[] { 0 }, new int[] { 6 }, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java index 9022cee5c72..4b8296783b9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java @@ -1134,7 +1134,7 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } public void testTrailingHole1() throws IOException { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java index 0650ebc6cae..a27e2601037 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java @@ -114,7 +114,7 @@ public class TestSnowball extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java index 7acc3502ea4..75525559a71 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java @@ -34,8 +34,8 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "jaktkarlarne", "jaktkarl"); - checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); + checkOneTerm(a, "jaktkarlarne", "jaktkarl"); + checkOneTerm(a, "jaktkarlens", "jaktkarl"); // stopword assertAnalyzesTo(a, "och", new String[] {}); } @@ -45,8 +45,8 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false); Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT, SwedishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne"); - checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); + checkOneTerm(a, "jaktkarlarne", "jaktkarlarne"); + checkOneTerm(a, "jaktkarlens", "jaktkarl"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java index 81914b2d25d..a00a18aeaa4 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java @@ -75,6 +75,6 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index 76a7d6480ff..141b525f0b1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -92,14 +92,14 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); - assertAnalyzesToReuse(analyzer, "", new String[] {}); + assertAnalyzesTo(analyzer, "", new String[] {}); - assertAnalyzesToReuse( + assertAnalyzesTo( analyzer, "การที่ได้ต้องแสดงว่างานดี", new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}); - assertAnalyzesToReuse( + assertAnalyzesTo( analyzer, "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" }); @@ -136,6 +136,6 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ThaiWordFilter(TEST_VERSION_CURRENT, tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java index 540f93feb42..c832ff689fa 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java @@ -34,8 +34,8 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "ağacı", "ağaç"); - checkOneTermReuse(a, "ağaç", "ağaç"); + checkOneTerm(a, "ağacı", "ağaç"); + checkOneTerm(a, "ağaç", "ağaç"); // stopword assertAnalyzesTo(a, "dolayı", new String[] {}); } @@ -45,8 +45,8 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ağacı"), false); Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT, TurkishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "ağacı", "ağacı"); - checkOneTermReuse(a, "ağaç", "ağaç"); + checkOneTerm(a, "ağacı", "ağacı"); + checkOneTerm(a, "ağaç", "ağaç"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java index ed8ad8aaa0a..48ed8bae1c2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java @@ -83,6 +83,6 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java index 0d5cead735f..f2f56d173cd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java @@ -69,7 +69,7 @@ public class TestElision extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ElisionFilter(tokenizer, FrenchAnalyzer.DEFAULT_ARTICLES)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java index 176ee9ba8b2..24a6fdea108 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java @@ -45,8 +45,7 @@ public final class ICUTokenizer extends Tokenizer { /** true length of text in the buffer */ private int length = 0; /** length in buffer that can be evaluated safely, up to a safe end point */ - // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset() - private int usableLength = -1; + private int usableLength = 0; /** accumulated offset of previous buffers for this reader, for offsetAtt */ private int offset = 0; diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java index 0bd315692ce..6990f94b93d 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java @@ -87,6 +87,6 @@ public class TestICUFoldingFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java index 09c412834b0..90559969672 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java @@ -87,6 +87,6 @@ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java index b8ec5506e0c..5c6e5617914 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java @@ -109,6 +109,6 @@ public class TestICUTransformFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin"))); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java index f11200853e7..f6649f3b872 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java @@ -207,7 +207,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - assertAnalyzesToReuse(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", + assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" }); } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java index 3e85bf80602..ca25597ce78 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java @@ -191,14 +191,14 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws IOException { - assertAnalyzesToReuse(analyzer, "あいうえおabcかきくけこ", + assertAnalyzesTo(analyzer, "あいうえおabcかきくけこ", new String[] { "あい", "いう", "うえ", "えお", "abc", "かき", "きく", "くけ", "けこ" }, new int[] { 0, 1, 2, 3, 5, 8, 9, 10, 11 }, new int[] { 2, 3, 4, 5, 8, 10, 11, 12, 13 }, new String[] { "", "", "", "", "", "", "", "", "" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1}); - assertAnalyzesToReuse(analyzer, "あいうえおabんcかきくけ こ", + assertAnalyzesTo(analyzer, "あいうえおabんcかきくけ こ", new String[] { "あい", "いう", "うえ", "えお", "ab", "ん", "c", "かき", "きく", "くけ", "こ" }, new int[] { 0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14 }, new int[] { 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15 }, diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java index 3568d56859d..6a0b735e937 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java @@ -243,7 +243,7 @@ public final class JapaneseTokenizer extends Tokenizer { outputCompounds = false; break; } - buffer.reset(null); // best effort NPE consumers that don't call reset() + buffer.reset(this.input); resetState(); @@ -260,8 +260,15 @@ public final class JapaneseTokenizer extends Tokenizer { this.dotOut = dotOut; } + @Override + public void close() throws IOException { + super.close(); + buffer.reset(input); + } + @Override public void reset() throws IOException { + super.reset(); buffer.reset(input); resetState(); } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java index 4735cb494e2..42e2abf67ac 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java @@ -75,6 +75,6 @@ public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java index 5561621ec75..161bc2b82cc 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java @@ -94,6 +94,6 @@ public class TestJapaneseKatakanaStemFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java index f413eb146e3..a6b6ff2d4ab 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java @@ -103,6 +103,6 @@ public class TestJapaneseReadingFormFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer)); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java index 93bbe3c48a6..366355b7d6e 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java @@ -44,16 +44,16 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { /** Test stemming of single tokens with Morfologik library. */ public final void testSingleTokens() throws IOException { Analyzer a = getTestAnalyzer(); - assertAnalyzesToReuse(a, "a", new String[] { "a" }); - assertAnalyzesToReuse(a, "liście", new String[] { "liście", "liść", "list", "lista" }); - assertAnalyzesToReuse(a, "danych", new String[] { "dany", "dana", "dane", "dać" }); - assertAnalyzesToReuse(a, "ęóąśłżźćń", new String[] { "ęóąśłżźćń" }); + assertAnalyzesTo(a, "a", new String[] { "a" }); + assertAnalyzesTo(a, "liście", new String[] { "liście", "liść", "list", "lista" }); + assertAnalyzesTo(a, "danych", new String[] { "dany", "dana", "dane", "dać" }); + assertAnalyzesTo(a, "ęóąśłżźćń", new String[] { "ęóąśłżźćń" }); } /** Test stemming of multiple tokens and proper term metrics. */ public final void testMultipleTokens() throws IOException { Analyzer a = getTestAnalyzer(); - assertAnalyzesToReuse( + assertAnalyzesTo( a, "liście danych", new String[] { "liście", "liść", "list", "lista", "dany", "dana", "dane", "dać" }, @@ -61,7 +61,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { new int[] { 6, 6, 6, 6, 13, 13, 13, 13 }, new int[] { 1, 0, 0, 0, 1, 0, 0, 0 }); - assertAnalyzesToReuse( + assertAnalyzesTo( a, "T. Gl\u00FCcksberg", new String[] { "tom", "tona", "Gl\u00FCcksberg" }, @@ -106,16 +106,16 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { public final void testCase() throws IOException { Analyzer a = getTestAnalyzer(); - assertAnalyzesToReuse(a, "AGD", new String[] { "AGD", "artykuły gospodarstwa domowego" }); - assertAnalyzesToReuse(a, "agd", new String[] { "artykuły gospodarstwa domowego" }); + assertAnalyzesTo(a, "AGD", new String[] { "AGD", "artykuły gospodarstwa domowego" }); + assertAnalyzesTo(a, "agd", new String[] { "artykuły gospodarstwa domowego" }); - assertAnalyzesToReuse(a, "Poznania", new String[] { "Poznań" }); - assertAnalyzesToReuse(a, "poznania", new String[] { "poznanie", "poznać" }); + assertAnalyzesTo(a, "Poznania", new String[] { "Poznań" }); + assertAnalyzesTo(a, "poznania", new String[] { "poznanie", "poznać" }); - assertAnalyzesToReuse(a, "Aarona", new String[] { "Aaron" }); - assertAnalyzesToReuse(a, "aarona", new String[] { "aarona" }); + assertAnalyzesTo(a, "Aarona", new String[] { "Aaron" }); + assertAnalyzesTo(a, "aarona", new String[] { "aarona" }); - assertAnalyzesToReuse(a, "Liście", new String[] { "liście", "liść", "list", "lista" }); + assertAnalyzesTo(a, "Liście", new String[] { "liście", "liść", "list", "lista" }); } private void assertPOSToken(TokenStream ts, String term, String... tags) throws IOException { @@ -183,7 +183,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { } }; - assertAnalyzesToReuse( + assertAnalyzesTo( a, "liście danych", new String[] { "liście", "dany", "dana", "dane", "dać" }, diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java index 4298aa6bace..98fc78b49c9 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java @@ -105,6 +105,6 @@ public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random().nextBoolean())); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java index b4c77a98fe3..5bc05021922 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java @@ -106,7 +106,7 @@ public class TestBeiderMorseFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true))); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } public void testCustomAttribute() throws IOException { diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java index a35d88eda18..68b688e4c09 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java @@ -51,27 +51,6 @@ public class TestDoubleMetaphoneFilterFactory extends BaseTokenStreamTestCase { assertTokenStreamContents(filteredStream, new String[] { "ANTRNXNL" }); } - /** - * Ensure that reset() removes any state (buffered tokens) - */ - public void testReset() throws Exception { - DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap()); - TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false); - - TokenStream filteredStream = factory.create(inputStream); - CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class); - assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass()); - - filteredStream.reset(); - assertTrue(filteredStream.incrementToken()); - assertEquals(13, termAtt.length()); - assertEquals("international", termAtt.toString()); - filteredStream.reset(); - - // ensure there are no more tokens, such as ANTRNXNL - assertFalse(filteredStream.incrementToken()); - } - /** Test that bogus arguments result in exception */ public void testBogusArguments() throws Exception { try { diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java index aaaaad63528..252fc8f98f4 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java @@ -113,7 +113,7 @@ public class TestPhoneticFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean())); } }; - checkOneTermReuse(a, "", ""); + checkOneTerm(a, "", ""); } } } diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java index 1ff8b88f096..3af41659527 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java @@ -108,6 +108,7 @@ public final class SentenceTokenizer extends Tokenizer { @Override public void reset() throws IOException { + super.reset(); tokenStart = tokenEnd = 0; } diff --git a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java index 899bfbfdf85..1cb8e9b6034 100644 --- a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java @@ -79,7 +79,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { String result[] = { "我", "购买", "了", "道具", "和", "服装", "," }; for (Analyzer analyzer : analyzers) { assertAnalyzesTo(analyzer, sentence, result); - assertAnalyzesToReuse(analyzer, sentence, result); + assertAnalyzesTo(analyzer, sentence, result); } } @@ -167,11 +167,11 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { Analyzer a = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); - assertAnalyzesToReuse(a, "我购买 Tests 了道具和服装", + assertAnalyzesTo(a, "我购买 Tests 了道具和服装", new String[] { "我", "购买", "test", "了", "道具", "和", "服装"}, new int[] { 0, 1, 4, 10, 11, 13, 14 }, new int[] { 1, 3, 9, 11, 13, 14, 16 }); - assertAnalyzesToReuse(a, "我购买了道具和服装。", + assertAnalyzesTo(a, "我购买了道具和服装。", new String[] { "我", "购买", "了", "道具", "和", "服装" }, new int[] { 0, 1, 3, 4, 6, 7 }, new int[] { 1, 3, 4, 6, 7, 9 }); diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java index 52c4f924882..60d894a5461 100644 --- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java +++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java @@ -34,8 +34,8 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { public void testBasics() throws IOException { Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT); // stemming - checkOneTermReuse(a, "studenta", "student"); - checkOneTermReuse(a, "studenci", "student"); + checkOneTerm(a, "studenta", "student"); + checkOneTerm(a, "studenci", "student"); // stopword assertAnalyzesTo(a, "był", new String[] {}); } @@ -45,8 +45,8 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);; Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT, PolishAnalyzer.getDefaultStopSet(), exclusionSet); - checkOneTermReuse(a, "studenta", "studenta"); - checkOneTermReuse(a, "studenci", "student"); + checkOneTerm(a, "studenta", "studenta"); + checkOneTerm(a, "studenci", "student"); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java index ead3bf9b576..8a326390caf 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java @@ -89,6 +89,7 @@ public abstract class BaseUIMATokenizer extends Tokenizer { @Override public void reset() throws IOException { + super.reset(); iterator = null; } } diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java b/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java index 9013bdb2148..2f9a20f4252 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java @@ -30,21 +30,28 @@ import java.io.IOException; call {@link AttributeSource#clearAttributes()} before setting attributes. */ -public abstract class Tokenizer extends TokenStream { +public abstract class Tokenizer extends TokenStream { /** The text source for this Tokenizer. */ - protected Reader input; + protected Reader input = ILLEGAL_STATE_READER; + + /** Pending reader: not actually assigned to input until reset() */ + private Reader inputPending = ILLEGAL_STATE_READER; /** Construct a token stream processing the given input. */ protected Tokenizer(Reader input) { - assert input != null: "input must not be null"; - this.input = input; + if (input == null) { + throw new NullPointerException("input must not be null"); + } + this.inputPending = input; } /** Construct a token stream processing the given input using the given AttributeFactory. */ protected Tokenizer(AttributeFactory factory, Reader input) { super(factory); - assert input != null: "input must not be null"; - this.input = input; + if (input == null) { + throw new NullPointerException("input must not be null"); + } + this.inputPending = input; } /** @@ -56,12 +63,10 @@ public abstract class Tokenizer extends TokenStream { */ @Override public void close() throws IOException { - if (input != null) { - input.close(); - // LUCENE-2387: don't hold onto Reader after close, so - // GC can reclaim - input = null; - } + input.close(); + // LUCENE-2387: don't hold onto Reader after close, so + // GC can reclaim + inputPending = input = ILLEGAL_STATE_READER; } /** Return the corrected offset. If {@link #input} is a {@link CharFilter} subclass @@ -71,7 +76,6 @@ public abstract class Tokenizer extends TokenStream { * @see CharFilter#correctOffset */ protected final int correctOffset(int currentOff) { - assert input != null: "this tokenizer is closed"; return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff; } @@ -79,14 +83,36 @@ public abstract class Tokenizer extends TokenStream { * analyzer (in its tokenStream method) will use * this to re-use a previously created tokenizer. */ public final void setReader(Reader input) throws IOException { - assert input != null: "input must not be null"; - this.input = input; + if (input == null) { + throw new NullPointerException("input must not be null"); + } + this.input = ILLEGAL_STATE_READER; + this.inputPending = input; assert setReaderTestPoint(); } + @Override + public void reset() throws IOException { + super.reset(); + input = inputPending; + inputPending = ILLEGAL_STATE_READER; + } + // only used by assert, for testing boolean setReaderTestPoint() { return true; } + + private static final Reader ILLEGAL_STATE_READER = new Reader() { + @Override + public int read(char[] cbuf, int off, int len) { + throw new IllegalStateException("TokenStream contract violation: reset()/close() call missing, " + + "reset() called multiple times, or subclass does not call super.reset(). " + + "Please see Javadocs of TokenStream class for more information about the correct consuming workflow."); + } + + @Override + public void close() {} + }; } diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java b/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java index 7bcbe75b46e..f29725c29db 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java @@ -68,7 +68,8 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase { } @Override - public void reset() { + public void reset() throws IOException { + super.reset(); tokens = null; upto = 0; } diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java index c7ab08162f5..f04f8496116 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java @@ -36,9 +36,9 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { Analyzer a = new MockAnalyzer(random()); assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ", new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" }); - assertAnalyzesToReuse(a, "aba cadaba shazam", + assertAnalyzesTo(a, "aba cadaba shazam", new String[] { "aba", "cadaba", "shazam" }); - assertAnalyzesToReuse(a, "break on whitespace", + assertAnalyzesTo(a, "break on whitespace", new String[] { "break", "on", "whitespace" }); } @@ -47,9 +47,9 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ", new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" }); - assertAnalyzesToReuse(a, "aba4cadaba-Shazam", + assertAnalyzesTo(a, "aba4cadaba-Shazam", new String[] { "aba", "cadaba", "shazam" }); - assertAnalyzesToReuse(a, "break+on/Letters", + assertAnalyzesTo(a, "break+on/Letters", new String[] { "break", "on", "letters" }); } @@ -58,9 +58,9 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { Analyzer a = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ", new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " }); - assertAnalyzesToReuse(a, "aba4cadaba-Shazam", + assertAnalyzesTo(a, "aba4cadaba-Shazam", new String[] { "aba4cadaba-Shazam" }); - assertAnalyzesToReuse(a, "break+on/Nothing", + assertAnalyzesTo(a, "break+on/Nothing", new String[] { "break+on/Nothing" }); } @@ -106,7 +106,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { stream.end(); stream.close(); - assertAnalyzesToReuse(analyzer, testString, new String[] { "t" }); + assertAnalyzesTo(analyzer, testString, new String[] { "t" }); } /** blast some random strings through the analyzer */ diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 86d639aca00..5e35a785a3a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1599,14 +1599,15 @@ public class TestIndexWriter extends LuceneTestCase { @Override public void reset() throws IOException { - this.upto = 0; - final StringBuilder b = new StringBuilder(); - final char[] buffer = new char[1024]; - int n; - while ((n = input.read(buffer)) != -1) { - b.append(buffer, 0, n); - } - this.tokens = b.toString().split(" "); + super.reset(); + this.upto = 0; + final StringBuilder b = new StringBuilder(); + final char[] buffer = new char[1024]; + int n; + while ((n = input.read(buffer)) != -1) { + b.append(buffer, 0, n); + } + this.tokens = b.toString().split(" "); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java index 2c0a12c8d89..c632e053a85 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java @@ -227,7 +227,8 @@ public class TestTermRangeQuery extends LuceneTestCase { } @Override - public void reset() throws IOException {; + public void reset() throws IOException { + super.reset(); done = false; } } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java index 7e0070f9024..f58c55e5117 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java @@ -49,7 +49,7 @@ public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase { assertTokenStreamContents(filter, new String[] {"short", "toolong", "evenmuchlongertext"}); - checkOneTermReuse(new Analyzer() { + checkOneTerm(new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName, Reader reader) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java index ea00fb3341d..c50265776ad 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java @@ -319,7 +319,8 @@ public abstract class AbstractTestCase extends LuceneTestCase { } @Override - public void reset() { + public void reset() throws IOException { + super.reset(); startTerm = 0; nextStartOffset = 0; snippet = null; diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java index bd22e75ccc8..4e033479008 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java @@ -82,6 +82,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase { @Override public void reset() throws IOException { + super.reset(); this.upto = 0; this.lastPos = 0; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index bcf51528477..10ebc302529 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -341,14 +341,17 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + checkResetException(a, input); assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException { + checkResetException(a, input); assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length()); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException { + checkResetException(a, input); assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect); } @@ -375,30 +378,28 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null); } - - public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { - assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); - } - - public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException { - assertAnalyzesToReuse(a, input, output, null, null, null, null); - } - - public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException { - assertAnalyzesToReuse(a, input, output, null, null, types, null); - } - - public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException { - assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements); - } - - public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException { - assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null); - } - - public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { - assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements); + static void checkResetException(Analyzer a, String input) throws IOException { + TokenStream ts = a.tokenStream("bogus", input); + try { + if (ts.incrementToken()) { + //System.out.println(ts.reflectAsString(false)); + fail("didn't get expected exception when reset() not called"); + } + } catch (IllegalStateException expected) { + // ok + } catch (AssertionError expected) { + // ok: MockTokenizer + assertTrue(expected.getMessage(), expected.getMessage() != null && expected.getMessage().contains("wrong state")); + } catch (Exception unexpected) { + fail("got wrong exception when reset() not called: " + unexpected); + } finally { + // consume correctly + ts.reset(); + while (ts.incrementToken()) {} + ts.end(); + ts.close(); + } } // simple utility method for testing stemmers @@ -407,10 +408,6 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertAnalyzesTo(a, input, new String[]{expected}); } - public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException { - assertAnalyzesToReuse(a, input, new String[]{expected}); - } - /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */ public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException { checkRandomData(random, a, iterations, 20, false, true); @@ -476,6 +473,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException { + checkResetException(a, "best effort"); long seed = random.nextLong(); boolean useCharFilter = random.nextBoolean(); Directory dir = null; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java index cbc96943a57..659831856c3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java @@ -41,7 +41,7 @@ public class VocabularyAssert { while ((inputWord = vocReader.readLine()) != null) { String expectedWord = outputReader.readLine(); Assert.assertNotNull(expectedWord); - BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord); + BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord); } } @@ -55,7 +55,7 @@ public class VocabularyAssert { if (inputLine.startsWith("#") || inputLine.trim().length() == 0) continue; /* comment */ String words[] = inputLine.split("\t"); - BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]); + BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]); } } diff --git a/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java b/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java index e5fe43c6119..719d7ad7b98 100644 --- a/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java +++ b/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java @@ -96,8 +96,9 @@ final class TrieTokenizer extends Tokenizer { } @Override - public void reset() { - try { + public void reset() throws IOException { + super.reset(); + try { int upto = 0; char[] buf = termAtt.buffer(); while (true) { @@ -167,6 +168,7 @@ final class TrieTokenizer extends Tokenizer { @Override public void end() throws IOException { + super.end(); if (hasValue) { ts.end(); } diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 5ff7dc223c8..c10b587c97a 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -691,6 +691,11 @@ final class TokenOrderingFilter extends TokenFilter { return true; } } + + @Override + public void reset() throws IOException { + // this looks wrong: but its correct. + } } // for TokenOrderingFilter, so it can easily sort by startOffset diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index d7be93b91fd..bb9a259d41d 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -74,6 +74,7 @@ public class BoolField extends PrimitiveFieldType { @Override public void reset() throws IOException { + super.reset(); done = false; } diff --git a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java index 631288eb1be..88c1cec2135 100644 --- a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java +++ b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java @@ -252,9 +252,11 @@ public class PreAnalyzedField extends FieldType { private byte[] binaryValue = null; private PreAnalyzedParser parser; private Reader lastReader; + private Reader input; // hides original input since we replay saved states (and dont reuse) public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) { super(reader); + this.input = reader; this.parser = parser; }