From 9938a39a872d4f232f718b2672d0245cae658e0b Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Fri, 8 Aug 2014 22:42:48 +0000 Subject: [PATCH] LUCENE-5859: Remove Version from Analyzer constructors git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1616901 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 + .../lucene/analysis/ar/ArabicAnalyzer.java | 26 +-- .../lucene/analysis/bg/BulgarianAnalyzer.java | 27 ++- .../lucene/analysis/br/BrazilianAnalyzer.java | 33 ++- .../lucene/analysis/ca/CatalanAnalyzer.java | 28 +-- .../charfilter/HTMLStripCharFilter.java | 5 +- .../charfilter/HTMLStripCharFilter.jflex | 3 +- .../lucene/analysis/cjk/CJKAnalyzer.java | 19 +- .../lucene/analysis/ckb/SoraniAnalyzer.java | 28 +-- .../commongrams/CommonGramsFilter.java | 3 +- .../commongrams/CommonGramsFilterFactory.java | 2 +- .../compound/CompoundWordTokenFilterBase.java | 57 ++--- .../DictionaryCompoundWordTokenFilter.java | 47 ++-- ...tionaryCompoundWordTokenFilterFactory.java | 18 +- .../HyphenationCompoundWordTokenFilter.java | 88 +++---- ...enationCompoundWordTokenFilterFactory.java | 26 ++- .../Lucene43CompoundWordTokenFilterBase.java | 162 +++++++++++++ ...ne43DictionaryCompoundWordTokenFilter.java | 100 ++++++++ ...e43HyphenationCompoundWordTokenFilter.java | 217 ++++++++++++++++++ .../lucene/analysis/core/KeywordAnalyzer.java | 2 - .../analysis/core/KeywordTokenizer.java | 2 - .../core/KeywordTokenizerFactory.java | 1 - .../lucene/analysis/core/LetterTokenizer.java | 23 +- .../analysis/core/LetterTokenizerFactory.java | 3 +- .../lucene/analysis/core/LowerCaseFilter.java | 13 +- .../analysis/core/LowerCaseFilterFactory.java | 3 +- .../analysis/core/LowerCaseTokenizer.java | 28 +-- .../core/LowerCaseTokenizerFactory.java | 3 +- .../lucene/analysis/core/SimpleAnalyzer.java | 21 +- .../lucene/analysis/core/StopAnalyzer.java | 48 ++-- .../lucene/analysis/core/StopFilter.java | 43 ++-- .../analysis/core/StopFilterFactory.java | 5 +- .../lucene/analysis/core/TypeTokenFilter.java | 11 +- .../analysis/core/TypeTokenFilterFactory.java | 2 +- .../lucene/analysis/core/UpperCaseFilter.java | 10 +- .../analysis/core/UpperCaseFilterFactory.java | 3 +- .../analysis/core/WhitespaceAnalyzer.java | 21 +- .../analysis/core/WhitespaceTokenizer.java | 29 +-- .../core/WhitespaceTokenizerFactory.java | 4 +- .../lucene/analysis/cz/CzechAnalyzer.java | 31 ++- .../lucene/analysis/da/DanishAnalyzer.java | 28 +-- .../lucene/analysis/de/GermanAnalyzer.java | 31 ++- .../lucene/analysis/el/GreekAnalyzer.java | 21 +- .../analysis/el/GreekLowerCaseFilter.java | 14 +- .../el/GreekLowerCaseFilterFactory.java | 3 +- .../lucene/analysis/el/GreekStemmer.java | 33 ++- .../lucene/analysis/en/EnglishAnalyzer.java | 28 +-- .../analysis/en/EnglishPossessiveFilter.java | 4 +- .../en/EnglishPossessiveFilterFactory.java | 3 +- .../apache/lucene/analysis/en/KStemmer.java | 3 +- .../lucene/analysis/es/SpanishAnalyzer.java | 28 +-- .../lucene/analysis/eu/BasqueAnalyzer.java | 26 +-- .../lucene/analysis/fa/PersianAnalyzer.java | 17 +- .../lucene/analysis/fi/FinnishAnalyzer.java | 28 +-- .../lucene/analysis/fr/FrenchAnalyzer.java | 33 ++- .../lucene/analysis/ga/IrishAnalyzer.java | 30 ++- .../lucene/analysis/gl/GalicianAnalyzer.java | 28 +-- .../lucene/analysis/hi/HindiAnalyzer.java | 24 +- .../lucene/analysis/hu/HungarianAnalyzer.java | 28 +-- .../lucene/analysis/hunspell/Stemmer.java | 3 +- .../lucene/analysis/hy/ArmenianAnalyzer.java | 26 +-- .../analysis/id/IndonesianAnalyzer.java | 28 +-- .../lucene/analysis/it/ItalianAnalyzer.java | 30 ++- .../lucene/analysis/lv/LatvianAnalyzer.java | 28 +-- .../CapitalizationFilterFactory.java | 2 +- .../miscellaneous/CodepointCountFilter.java | 6 +- .../CodepointCountFilterFactory.java | 2 +- .../miscellaneous/KeepWordFilter.java | 6 +- .../miscellaneous/KeepWordFilterFactory.java | 3 +- .../analysis/miscellaneous/LengthFilter.java | 6 +- .../miscellaneous/LengthFilterFactory.java | 2 +- .../RemoveDuplicatesTokenFilter.java | 4 +- .../analysis/miscellaneous/TrimFilter.java | 7 +- .../miscellaneous/TrimFilterFactory.java | 2 +- .../miscellaneous/WordDelimiterFilter.java | 10 +- .../WordDelimiterFilterFactory.java | 2 +- .../ngram/EdgeNGramFilterFactory.java | 10 +- .../analysis/ngram/EdgeNGramTokenFilter.java | 12 +- .../analysis/ngram/EdgeNGramTokenizer.java | 12 +- .../ngram/EdgeNGramTokenizerFactory.java | 9 +- .../ngram/Lucene43EdgeNGramTokenFilter.java | 126 ++++++++++ .../ngram/Lucene43EdgeNGramTokenizer.java | 53 +++++ .../ngram/Lucene43NGramTokenFilter.java | 150 ++++++++++++ .../ngram/Lucene43NGramTokenizer.java | 2 +- .../analysis/ngram/NGramFilterFactory.java | 10 +- .../analysis/ngram/NGramTokenFilter.java | 100 +++----- .../lucene/analysis/ngram/NGramTokenizer.java | 34 +-- .../analysis/ngram/NGramTokenizerFactory.java | 2 +- .../lucene/analysis/nl/DutchAnalyzer.java | 38 ++- .../lucene/analysis/no/NorwegianAnalyzer.java | 28 +-- .../analysis/pt/PortugueseAnalyzer.java | 28 +-- .../lucene/analysis/pt/RSLPStemmerBase.java | 4 +- .../query/QueryAutoStopWordAnalyzer.java | 25 +- .../analysis/reverse/ReverseStringFilter.java | 32 +-- .../reverse/ReverseStringFilterFactory.java | 3 +- .../lucene/analysis/ro/RomanianAnalyzer.java | 26 +-- .../lucene/analysis/ru/RussianAnalyzer.java | 29 +-- .../shingle/ShingleAnalyzerWrapper.java | 9 +- .../analysis/standard/ClassicAnalyzer.java | 42 +--- .../analysis/standard/ClassicTokenizer.java | 13 +- .../standard/ClassicTokenizerFactory.java | 3 +- .../analysis/standard/StandardAnalyzer.java | 54 ++--- .../analysis/standard/StandardFilter.java | 3 +- .../standard/StandardFilterFactory.java | 3 +- .../analysis/standard/StandardTokenizer.java | 13 +- .../standard/StandardTokenizerFactory.java | 3 +- .../standard/UAX29URLEmailAnalyzer.java | 37 +-- .../standard/UAX29URLEmailTokenizer.java | 15 +- .../UAX29URLEmailTokenizerFactory.java | 3 +- .../lucene/analysis/sv/SwedishAnalyzer.java | 28 +-- .../synonym/SynonymFilterFactory.java | 6 +- .../lucene/analysis/th/ThaiAnalyzer.java | 30 ++- .../lucene/analysis/th/ThaiWordFilter.java | 3 +- .../analysis/th/ThaiWordFilterFactory.java | 3 +- .../lucene/analysis/tr/TurkishAnalyzer.java | 33 ++- .../util/AbstractAnalysisFactory.java | 9 +- .../lucene/analysis/util/CharArrayMap.java | 53 +---- .../lucene/analysis/util/CharArraySet.java | 46 +--- .../lucene/analysis/util/CharTokenizer.java | 16 +- .../lucene/analysis/util/CharacterUtils.java | 27 +-- .../analysis/util/FilteringTokenFilter.java | 6 +- .../analysis/util/StopwordAnalyzerBase.java | 34 +-- .../lucene/analysis/util/WordlistLoader.java | 16 +- .../collation/CollationKeyAnalyzer.java | 6 +- .../analysis/ar/TestArabicAnalyzer.java | 20 +- .../analysis/ar/TestArabicStemFilter.java | 2 +- .../analysis/bg/TestBulgarianAnalyzer.java | 17 +- .../analysis/bg/TestBulgarianStemmer.java | 14 +- .../analysis/br/TestBrazilianStemmer.java | 14 +- .../analysis/ca/TestCatalanAnalyzer.java | 13 +- .../lucene/analysis/cjk/TestCJKAnalyzer.java | 10 +- .../analysis/cjk/TestCJKBigramFilter.java | 10 +- .../analysis/ckb/TestSoraniAnalyzer.java | 14 +- .../analysis/ckb/TestSoraniStemFilter.java | 2 +- .../commongrams/CommonGramsFilterTest.java | 31 ++- .../compound/TestCompoundWordTokenFilter.java | 35 ++- .../lucene/analysis/core/TestAnalyzers.java | 39 ++-- .../analysis/core/TestBugInSomething.java | 12 +- .../analysis/core/TestClassicAnalyzer.java | 23 +- .../analysis/core/TestDuelingAnalyzers.java | 12 +- .../analysis/core/TestKeywordAnalyzer.java | 5 +- .../analysis/core/TestRandomChains.java | 10 +- .../analysis/core/TestStandardAnalyzer.java | 12 +- .../analysis/core/TestStopAnalyzer.java | 11 +- .../lucene/analysis/core/TestStopFilter.java | 25 +- .../analysis/core/TestTypeTokenFilter.java | 12 +- .../core/TestUAX29URLEmailAnalyzer.java | 5 +- .../core/TestUAX29URLEmailTokenizer.java | 10 +- .../lucene/analysis/cz/TestCzechAnalyzer.java | 11 +- .../lucene/analysis/cz/TestCzechStemmer.java | 16 +- .../analysis/da/TestDanishAnalyzer.java | 10 +- .../analysis/de/TestGermanAnalyzer.java | 14 +- .../de/TestGermanLightStemFilter.java | 2 +- .../de/TestGermanMinimalStemFilter.java | 2 +- .../analysis/de/TestGermanStemFilter.java | 4 +- .../lucene/analysis/el/GreekAnalyzerTest.java | 6 +- .../lucene/analysis/el/TestGreekStemmer.java | 2 +- .../analysis/en/TestEnglishAnalyzer.java | 10 +- .../analysis/en/TestPorterStemFilter.java | 2 +- .../analysis/es/TestSpanishAnalyzer.java | 10 +- .../analysis/eu/TestBasqueAnalyzer.java | 10 +- .../analysis/fa/TestPersianAnalyzer.java | 18 +- .../analysis/fi/TestFinnishAnalyzer.java | 10 +- .../fi/TestFinnishLightStemFilter.java | 2 +- .../analysis/fr/TestFrenchAnalyzer.java | 19 +- .../fr/TestFrenchLightStemFilter.java | 2 +- .../fr/TestFrenchMinimalStemFilter.java | 2 +- .../lucene/analysis/ga/TestIrishAnalyzer.java | 14 +- .../analysis/gl/TestGalicianAnalyzer.java | 10 +- .../gl/TestGalicianMinimalStemFilter.java | 2 +- .../lucene/analysis/hi/TestHindiAnalyzer.java | 10 +- .../analysis/hu/TestHungarianAnalyzer.java | 10 +- .../hu/TestHungarianLightStemFilter.java | 2 +- .../hunspell/TestHunspellStemFilter.java | 2 +- .../analysis/hy/TestArmenianAnalyzer.java | 10 +- .../analysis/id/TestIndonesianAnalyzer.java | 10 +- .../analysis/it/TestItalianAnalyzer.java | 15 +- .../analysis/lv/TestLatvianAnalyzer.java | 10 +- .../TestCapitalizationFilter.java | 2 +- .../TestCodepointCountFilter.java | 8 +- .../miscellaneous/TestKeepWordFilter.java | 6 +- .../TestKeywordMarkerFilter.java | 10 +- .../miscellaneous/TestLengthFilter.java | 6 +- .../TestLucene47WordDelimiterFilter.java | 9 +- .../TestPerFieldAnalyzerWrapper.java | 8 +- .../TestStemmerOverrideFilter.java | 2 +- .../miscellaneous/TestTrimFilter.java | 8 +- .../TestWordDelimiterFilter.java | 43 ++-- .../ngram/EdgeNGramTokenFilterTest.java | 32 +-- .../ngram/EdgeNGramTokenizerTest.java | 18 +- .../analysis/ngram/NGramTokenFilterTest.java | 30 +-- .../analysis/ngram/NGramTokenizerTest.java | 18 +- .../lucene/analysis/nl/TestDutchStemmer.java | 21 +- .../analysis/no/TestNorwegianAnalyzer.java | 10 +- .../no/TestNorwegianLightStemFilter.java | 2 +- .../no/TestNorwegianMinimalStemFilter.java | 2 +- .../analysis/pt/TestPortugueseAnalyzer.java | 10 +- .../pt/TestPortugueseLightStemFilter.java | 2 +- .../pt/TestPortugueseMinimalStemFilter.java | 2 +- .../analysis/pt/TestPortugueseStemFilter.java | 2 +- .../query/QueryAutoStopWordAnalyzerTest.java | 19 +- .../reverse/TestReverseStringFilter.java | 38 ++- .../analysis/ro/TestRomanianAnalyzer.java | 10 +- .../analysis/ru/TestRussianAnalyzer.java | 11 +- .../ru/TestRussianLightStemFilter.java | 2 +- .../shingle/ShingleAnalyzerWrapperTest.java | 4 +- .../analysis/shingle/ShingleFilterTest.java | 2 +- .../sinks/TestTeeSinkTokenFilter.java | 14 +- .../analysis/sv/TestSwedishAnalyzer.java | 10 +- .../sv/TestSwedishLightStemFilter.java | 2 +- .../synonym/TestSolrSynonymParser.java | 4 +- .../lucene/analysis/th/TestThaiAnalyzer.java | 16 +- .../analysis/tr/TestTurkishAnalyzer.java | 11 +- .../analysis/util/TestCharArrayMap.java | 8 +- .../analysis/util/TestCharArraySet.java | 51 ++-- .../analysis/util/TestCharTokenizers.java | 12 +- .../analysis/util/TestCharacterUtils.java | 14 +- .../lucene/analysis/util/TestElision.java | 4 +- .../util/TestFilesystemResourceLoader.java | 3 +- .../analysis/util/TestWordlistLoader.java | 8 +- .../collation/TestCollationKeyAnalyzer.java | 4 +- .../icu/TestICUNormalizer2CharFilter.java | 2 +- .../segmentation/TestWithCJKBigramFilter.java | 5 +- .../lucene/analysis/ja/JapaneseAnalyzer.java | 16 +- .../ja/JapanesePartOfSpeechStopFilter.java | 6 +- ...JapanesePartOfSpeechStopFilterFactory.java | 2 +- .../analysis/ja/TestJapaneseAnalyzer.java | 24 +- .../ja/TestJapaneseBaseFormFilter.java | 3 +- .../ja/TestJapaneseKatakanaStemFilter.java | 3 +- .../morfologik/MorfologikAnalyzer.java | 16 +- .../analysis/morfologik/MorfologikFilter.java | 10 +- .../morfologik/MorfologikFilterFactory.java | 2 +- .../morfologik/TestMorfologikAnalyzer.java | 15 +- .../analysis/phonetic/TestPhoneticFilter.java | 4 +- .../cn/smart/SmartChineseAnalyzer.java | 21 +- .../cn/smart/TestSmartChineseAnalyzer.java | 42 ++-- .../lucene/analysis/pl/PolishAnalyzer.java | 28 +-- .../analysis/pl/TestPolishAnalyzer.java | 11 +- .../byTask/feeds/EnwikiQueryMaker.java | 3 +- .../byTask/feeds/FileBasedQueryMaker.java | 3 +- .../byTask/feeds/LongToEnglishQueryMaker.java | 3 +- .../byTask/feeds/ReutersQueryMaker.java | 3 +- .../byTask/feeds/SimpleQueryMaker.java | 3 +- .../tasks/NewCollationAnalyzerTask.java | 5 +- .../quality/utils/SimpleQQParser.java | 2 +- .../benchmark/byTask/TestPerfTasksLogic.java | 11 +- .../SimpleNaiveBayesClassifierTest.java | 2 +- .../org/apache/lucene/analysis/Analyzer.java | 16 ++ .../org/apache/lucene/demo/IndexFiles.java | 2 +- .../org/apache/lucene/demo/SearchFiles.java | 7 +- .../demo/facet/AssociationsFacetsExample.java | 2 +- .../demo/facet/DistanceFacetsExample.java | 2 +- .../ExpressionAggregationFacetsExample.java | 2 +- .../MultiCategoryListsFacetsExample.java | 2 +- .../lucene/demo/facet/RangeFacetsExample.java | 2 +- .../demo/facet/SimpleFacetsExample.java | 2 +- .../facet/SimpleSortedSetFacetsExample.java | 2 +- .../demo/xmlparser/FormBasedXmlQueryDemo.java | 2 +- .../lucene/index/memory/MemoryIndexTest.java | 2 +- .../analyzing/AnalyzingQueryParser.java | 5 +- .../classic/MultiFieldQueryParser.java | 26 +-- .../queryparser/classic/QueryParser.java | 14 +- .../lucene/queryparser/classic/QueryParser.jj | 14 +- .../queryparser/classic/QueryParserBase.java | 4 +- .../ComplexPhraseQueryParser.java | 5 +- .../ext/ExtendableQueryParser.java | 14 +- .../xml/builders/UserInputQueryBuilder.java | 2 +- .../analyzing/TestAnalyzingQueryParser.java | 6 +- .../classic/TestMultiAnalyzer.java | 6 +- .../classic/TestMultiFieldQueryParser.java | 48 ++-- .../classic/TestMultiPhraseQueryParsing.java | 2 +- .../queryparser/classic/TestQueryParser.java | 29 ++- .../complexPhrase/TestComplexPhraseQuery.java | 6 +- .../ext/TestExtendableQueryParser.java | 4 +- .../analyzing/AnalyzingInfixSuggester.java | 14 +- .../AnalyzingInfixSuggesterTest.java | 4 +- .../analyzing/BlendedInfixSuggesterTest.java | 8 +- .../analyzing/TestFreeTextSuggester.java | 8 +- .../analyzing/TestSuggestStopFilter.java | 12 +- .../component/SpellCheckComponent.java | 2 +- .../analysis/ManagedStopFilterFactory.java | 4 +- .../apache/solr/schema/CollationField.java | 2 +- .../solr/schema/FieldTypePluginLoader.java | 34 ++- .../search/ComplexPhraseQParserPlugin.java | 2 +- .../solr/spelling/SolrSpellChecker.java | 2 +- .../conf/schema-luceneMatchVersion.xml | 12 +- .../solr/analysis/TestLuceneMatchVersion.java | 21 +- .../solr/core/TestArbitraryIndexDir.java | 2 +- .../solr/highlight/HighlighterTest.java | 4 +- .../test/org/apache/solr/search/TestSort.java | 2 +- .../spelling/IndexBasedSpellCheckerTest.java | 2 +- .../solr/spelling/SimpleQueryConverter.java | 2 +- .../spelling/SpellingQueryConverterTest.java | 10 +- .../TestSuggestSpellingConverter.java | 4 +- 294 files changed, 2407 insertions(+), 2344 deletions(-) create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43HyphenationCompoundWordTokenFilter.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index aab9c832c58..0d5cf8dff71 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -80,6 +80,11 @@ API Changes takes the same selectors. Add helper methods to DocValues.java that are better suited for search code (never return null, etc). (Mike McCandless, Robert Muir) +* LUCENE-5859: Remove Version from Analyzer constructors. Use Analyzer.setVersion() + to set the version an analyzer should use to replicate behavior from a specific + release. + (Ryan Ernst, Robert Muir) + Documentation * LUCENE-5392: Add/improve analysis package documentation to reflect diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java index 39e5a087f2c..4dea7abbc49 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Arabic. @@ -89,20 +88,18 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public ArabicAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public ArabicAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords){ - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public ArabicAnalyzer(CharArraySet stopwords){ + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -110,17 +107,14 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * {@link ArabicStemFilter}. * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a set of terms not to be stemmed */ - public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){ - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){ + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -136,10 +130,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new LowerCaseFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); // the order here is important: the stopword list is not normalized! - result = new StopFilter( matchVersion, result, stopwords); + result = new StopFilter(result, stopwords); // TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?! result = new ArabicNormalizationFilter(result); if(!stemExclusionSet.isEmpty()) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java index ffb9aed1b59..76e6ca05fc2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java @@ -19,7 +19,6 @@ package org.apache.lucene.analysis.bg; import java.io.IOException; import java.io.Reader; -import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -31,7 +30,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Bulgarian. @@ -42,6 +40,7 @@ import org.apache.lucene.util.Version; *

*/ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { + /** * File containing default Bulgarian stopwords. * @@ -84,15 +83,15 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. */ - public BulgarianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public BulgarianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. */ - public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public BulgarianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -100,10 +99,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { * If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter} * before {@link BulgarianStemFilter}. */ - public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); } + public BulgarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + } /** * Creates a @@ -119,10 +118,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { */ @Override public TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new BulgarianStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java index cddd3920c24..3c4decb0461 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Brazilian Portuguese language. @@ -44,7 +43,7 @@ import org.apache.lucene.util.Version; * not be stemmed, but indexed). *

* - *

NOTE: This class uses the same {@link Version} + *

NOTE: This class uses the same {@link org.apache.lucene.util.Version} * dependent settings as {@link StandardAnalyzer}.

*/ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { @@ -65,7 +64,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#"); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -83,35 +82,29 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}). */ - public BrazilianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public BrazilianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) { - super(matchVersion, stopwords); + public BrazilianAnalyzer(CharArraySet stopwords) { + super(stopwords); } /** * Builds an analyzer with the given stop words and stemming exclusion words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, - CharArraySet stemExclusionSet) { - this(matchVersion, stopwords); - excltable = CharArraySet.unmodifiableSet(CharArraySet - .copy(matchVersion, stemExclusionSet)); + public BrazilianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + this(stopwords); + excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -126,10 +119,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new LowerCaseFilter(matchVersion, source); - result = new StandardFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); + result = new StandardFilter(result); + result = new StopFilter(result, stopwords); if(excltable != null && !excltable.isEmpty()) result = new SetKeywordMarkerFilter(result, excltable); return new TokenStreamComponents(source, new BrazilianStemFilter(result)); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java index 342348204a8..61ca46bb8a1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.CatalanStemmer; /** @@ -46,7 +45,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet(Version.LUCENE_CURRENT, + new CharArraySet( Arrays.asList( "d", "l", "m", "n", "s", "t" ), true)); @@ -81,18 +80,17 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public CatalanAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public CatalanAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public CatalanAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -100,14 +98,12 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -124,11 +120,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); result = new ElisionFilter(result, DEFAULT_ARTICLES); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new CatalanStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index b10e1797863..ad304545195 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -26,7 +26,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; -import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.OpenStringBuilder; @@ -29841,7 +29840,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { upperCaseVariantsAccepted.put("amp", "AMP"); } private static final CharArrayMap entityValues - = new CharArrayMap<>(Version.LUCENE_CURRENT, 253, false); + = new CharArrayMap<>(253, false); static { String[] entities = { "AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2", @@ -29980,7 +29979,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { escapeSTYLE = true; } else { if (null == this.escapedTags) { - this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true); + this.escapedTags = new CharArraySet(16, true); } this.escapedTags.add(tag); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex index 4ec0785f6f3..8c34577d8a6 100755 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex @@ -24,7 +24,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; -import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.OpenStringBuilder; @@ -195,7 +194,7 @@ InlineElment = ( [aAbBiIqQsSuU] | escapeSTYLE = true; } else { if (null == this.escapedTags) { - this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true); + this.escapedTags = new CharArraySet(16, true); } this.escapedTags.add(tag); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java index 958974c0dda..dda8e939d17 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cjk; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -28,7 +27,6 @@ import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * An {@link Analyzer} that tokenizes text with {@link StandardTokenizer}, @@ -37,6 +35,7 @@ import org.apache.lucene.util.Version; * and filters stopwords with {@link StopFilter} */ public final class CJKAnalyzer extends StopwordAnalyzerBase { + /** * File containing default CJK stopwords. *

@@ -70,29 +69,27 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer which removes words in {@link #getDefaultStopSet()}. */ - public CJKAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public CJKAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){ - super(matchVersion, stopwords); + public CJKAnalyzer(CharArraySet stopwords){ + super(stopwords); } @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); + final Tokenizer source = new StandardTokenizer(); // run the widthfilter first before bigramming, it sometimes combines characters. TokenStream result = new CJKWidthFilter(source); - result = new LowerCaseFilter(matchVersion, result); + result = new LowerCaseFilter(result); result = new CJKBigramFilter(result); - return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords)); + return new TokenStreamComponents(source, new StopFilter(result, stopwords)); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java index 8a89ae5a978..edee99c5636 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Sorani Kurdish. @@ -62,7 +61,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,18 +73,17 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public SoraniAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public SoraniAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public SoraniAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,14 +91,12 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public SoraniAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -118,11 +114,11 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); result = new SoraniNormalizationFilter(result); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SoraniStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java index 052878028e3..d04e1b8e525 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java @@ -27,7 +27,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /* * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors @@ -78,7 +77,7 @@ public final class CommonGramsFilter extends TokenFilter { * @param input TokenStream input in filter chain * @param commonWords The set of common words. */ - public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) { + public CommonGramsFilter(TokenStream input, CharArraySet commonWords) { super(input); this.commonWords = commonWords; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java index 637568e8008..82765f45f69 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java @@ -76,7 +76,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso @Override public TokenFilter create(TokenStream input) { - CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords); + CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords); return commonGrams; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java index 60866579ad7..b6718afdc9b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java @@ -17,31 +17,18 @@ package org.apache.lucene.analysis.compound; * limitations under the License. */ -import java.io.IOException; -import java.util.LinkedList; - import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; + +import java.io.IOException; +import java.util.LinkedList; /** * Base class for decomposition token filters. - *

- * - * - * You must specify the required {@link Version} compatibility when creating - * CompoundWordTokenFilterBase: - *

*/ public abstract class CompoundWordTokenFilterBase extends TokenFilter { /** @@ -59,31 +46,29 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { */ public static final int DEFAULT_MAX_SUBWORD_SIZE = 15; - protected final Version matchVersion; protected final CharArraySet dictionary; protected final LinkedList tokens; protected final int minWordSize; protected final int minSubwordSize; protected final int maxSubwordSize; protected final boolean onlyLongestMatch; - + protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - - private AttributeSource.State current; - protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) { - this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch); + private State current; + + protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) { + this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch); } - protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary) { - this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false); + protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary) { + this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false); } - protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { + protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { super(input); - this.matchVersion = matchVersion; this.tokens=new LinkedList<>(); if (minWordSize < 0) { throw new IllegalArgumentException("minWordSize cannot be negative"); @@ -100,7 +85,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { this.onlyLongestMatch=onlyLongestMatch; this.dictionary = dictionary; } - + @Override public final boolean incrementToken() throws IOException { if (!tokens.isEmpty()) { @@ -141,7 +126,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { tokens.clear(); current = null; } - + /** * Helper class to hold decompounded token information */ @@ -154,20 +139,8 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length); // offsets of the original word - int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset(); - int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset(); - - if (matchVersion.onOrAfter(Version.LUCENE_4_4) || - endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) { - // if length by start + end offsets doesn't match the term text then assume - // this is a synonym and don't adjust the offsets. - this.startOffset = startOff; - this.endOffset = endOff; - } else { - final int newStart = startOff + offset; - this.startOffset = newStart; - this.endOffset = newStart + length; - } + this.startOffset = CompoundWordTokenFilterBase.this.offsetAtt.startOffset(); + this.endOffset = CompoundWordTokenFilterBase.this.offsetAtt.endOffset(); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java index e7d697c10f0..34e19b7d305 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java @@ -18,60 +18,39 @@ package org.apache.lucene.analysis.compound; */ -import java.util.Set; - -import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** - * A {@link TokenFilter} that decomposes compound words found in many Germanic languages. + * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages. *

* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find - * "Donaudampfschiff" even when you only enter "schiff". + * "Donaudampfschiff" even when you only enter "schiff". * It uses a brute-force algorithm to achieve this. *

- * You must specify the required {@link Version} compatibility when creating - * CompoundWordTokenFilterBase: - *

    - *
  • As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 - * supplementary characters in strings and char arrays provided as compound word - * dictionaries. - *
*/ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase { - + /** * Creates a new {@link DictionaryCompoundWordTokenFilter} - * - * @param matchVersion - * Lucene version to enable correct Unicode 4.0 behavior in the - * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details. + * * @param input - * the {@link TokenStream} to process + * the {@link org.apache.lucene.analysis.TokenStream} to process * @param dictionary * the word dictionary to match against. */ - public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) { - super(matchVersion, input, dictionary); + public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary) { + super(input, dictionary); if (dictionary == null) { throw new IllegalArgumentException("dictionary cannot be null"); } } - + /** * Creates a new {@link DictionaryCompoundWordTokenFilter} - * - * @param matchVersion - * Lucene version to enable correct Unicode 4.0 behavior in the - * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details. + * * @param input - * the {@link TokenStream} to process + * the {@link org.apache.lucene.analysis.TokenStream} to process * @param dictionary * the word dictionary to match against. * @param minWordSize @@ -83,9 +62,9 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa * @param onlyLongestMatch * Add only the longest matching subword to the stream */ - public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary, - int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { - super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); + public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary, + int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { + super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); if (dictionary == null) { throw new IllegalArgumentException("dictionary cannot be null"); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java index 09770b8f3cf..8c88c08cedd 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java @@ -22,12 +22,13 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.util.Version; import java.util.Map; import java.io.IOException; /** - * Factory for {@link DictionaryCompoundWordTokenFilter}. + * Factory for {@link Lucene43DictionaryCompoundWordTokenFilter}. *
  * <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
@@ -50,9 +51,9 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
     super(args);
     assureMatchVersion();
     dictFile = require(args, "dictionary");
-    minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
-    minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
-    maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+    minWordSize = getInt(args, "minWordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+    minSubwordSize = getInt(args, "minSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+    maxSubwordSize = getInt(args, "maxSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
     onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -67,8 +68,13 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
   @Override
   public TokenStream create(TokenStream input) {
     // if the dictionary is null, it means it was empty
-    return dictionary == null ? input : new DictionaryCompoundWordTokenFilter
-        (luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+    if (dictionary == null) {
+      return input;
+    }
+    if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+      return new DictionaryCompoundWordTokenFilter(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+    }
+    return new Lucene43DictionaryCompoundWordTokenFilter(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
   }
 }
 
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
index 909b3805e67..674bd813b26 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
@@ -17,67 +17,47 @@ package org.apache.lucene.analysis.compound;
  * limitations under the License.
  */
 
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
 import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
 import org.xml.sax.InputSource;
 
+import java.io.File;
+import java.io.IOException;
+
 /**
- * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
- * 

+ * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages. + * * "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find * "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation * grammar and a word dictionary to achieve this. - *

- * You must specify the required {@link Version} compatibility when creating - * CompoundWordTokenFilterBase: - *

    - *
  • As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 - * supplementary characters in strings and char arrays provided as compound word - * dictionaries. - *
*/ public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase { private HyphenationTree hyphenator; /** - * Creates a new {@link HyphenationCompoundWordTokenFilter} instance. - * - * @param matchVersion - * Lucene version to enable correct Unicode 4.0 behavior in the - * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details. + * Creates a new {@link HyphenationCompoundWordTokenFilter} instance. + * * @param input - * the {@link TokenStream} to process + * the {@link org.apache.lucene.analysis.TokenStream} to process * @param hyphenator * the hyphenation pattern tree to use for hyphenation * @param dictionary * the word dictionary to match against. */ - public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, - HyphenationTree hyphenator, CharArraySet dictionary) { - this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, + public HyphenationCompoundWordTokenFilter(TokenStream input, + HyphenationTree hyphenator, CharArraySet dictionary) { + this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false); } /** * Creates a new {@link HyphenationCompoundWordTokenFilter} instance. - * - * @param matchVersion - * Lucene version to enable correct Unicode 4.0 behavior in the - * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details. + * * @param input - * the {@link TokenStream} to process + * the {@link org.apache.lucene.analysis.TokenStream} to process * @param hyphenator * the hyphenation pattern tree to use for hyphenation * @param dictionary @@ -91,10 +71,10 @@ public class HyphenationCompoundWordTokenFilter extends * @param onlyLongestMatch * Add only the longest matching subword to the stream */ - public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, - HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, - int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { - super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, + public HyphenationCompoundWordTokenFilter(TokenStream input, + HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, + int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { + super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); this.hyphenator = hyphenator; @@ -103,36 +83,36 @@ public class HyphenationCompoundWordTokenFilter extends /** * Create a HyphenationCompoundWordTokenFilter with no dictionary. *

- * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean) + * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean) * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, * null, minWordSize, minSubwordSize, maxSubwordSize } */ - public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, - HyphenationTree hyphenator, int minWordSize, int minSubwordSize, - int maxSubwordSize) { - this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, + public HyphenationCompoundWordTokenFilter(TokenStream input, + HyphenationTree hyphenator, int minWordSize, int minSubwordSize, + int maxSubwordSize) { + this(input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false); } - + /** * Create a HyphenationCompoundWordTokenFilter with no dictionary. *

- * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int) - * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, + * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, int, int, int) + * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, * DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE } */ - public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, - HyphenationTree hyphenator) { - this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, + public HyphenationCompoundWordTokenFilter(TokenStream input, + HyphenationTree hyphenator) { + this(input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE); } /** * Create a hyphenator tree - * + * * @param hyphenationFilename the filename of the XML grammar to load * @return An object representing the hyphenation patterns - * @throws IOException If there is a low-level I/O error. + * @throws java.io.IOException If there is a low-level I/O error. */ public static HyphenationTree getHyphenationTree(String hyphenationFilename) throws IOException { @@ -141,10 +121,10 @@ public class HyphenationCompoundWordTokenFilter extends /** * Create a hyphenator tree - * + * * @param hyphenationFile the file of the XML grammar to load * @return An object representing the hyphenation patterns - * @throws IOException If there is a low-level I/O error. + * @throws java.io.IOException If there is a low-level I/O error. */ public static HyphenationTree getHyphenationTree(File hyphenationFile) throws IOException { @@ -153,10 +133,10 @@ public class HyphenationCompoundWordTokenFilter extends /** * Create a hyphenator tree - * + * * @param hyphenationSource the InputSource pointing to the XML grammar * @return An object representing the hyphenation patterns - * @throws IOException If there is a low-level I/O error. + * @throws java.io.IOException If there is a low-level I/O error. */ public static HyphenationTree getHyphenationTree(InputSource hyphenationSource) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java index e1295c73f6f..d7e9b3368a8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.compound; * limitations under the License. */ +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.util.CharArraySet; @@ -28,10 +29,12 @@ import org.apache.lucene.util.IOUtils; import java.util.Map; import java.io.IOException; import java.io.InputStream; + +import org.apache.lucene.util.Version; import org.xml.sax.InputSource; /** - * Factory for {@link HyphenationCompoundWordTokenFilter}. + * Factory for {@link Lucene43HyphenationCompoundWordTokenFilter}. *

* This factory accepts the following parameters: *

    @@ -55,7 +58,7 @@ import org.xml.sax.InputSource; * </analyzer> * </fieldType>
* - * @see HyphenationCompoundWordTokenFilter + * @see Lucene43HyphenationCompoundWordTokenFilter */ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { private CharArraySet dictionary; @@ -75,9 +78,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactor dictFile = get(args, "dictionary"); encoding = get(args, "encoding"); hypFile = require(args, "hyphenator"); - minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE); - minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE); - maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE); + minWordSize = getInt(args, "minWordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE); + minSubwordSize = getInt(args, "minSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE); + maxSubwordSize = getInt(args, "maxSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE); onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -96,14 +99,21 @@ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactor final InputSource is = new InputSource(stream); is.setEncoding(encoding); // if it's null let xml parser decide is.setSystemId(hypFile); - hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); + if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) { + hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); + } else { + hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is); + } } finally { IOUtils.closeWhileHandlingException(stream); } } @Override - public HyphenationCompoundWordTokenFilter create(TokenStream input) { - return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); + public TokenFilter create(TokenStream input) { + if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) { + return new HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); + } + return new Lucene43HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java new file mode 100644 index 00000000000..e5b40703873 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java @@ -0,0 +1,162 @@ +package org.apache.lucene.analysis.compound; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.LinkedList; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.AttributeSource; + +/** + * Base class for decomposition token filters using pre-4.4 behavior. + *

+ * @deprecated Use {@link CompoundWordTokenFilterBase} + */ +@Deprecated +public abstract class Lucene43CompoundWordTokenFilterBase extends TokenFilter { + /** + * The default for minimal word length that gets decomposed + */ + public static final int DEFAULT_MIN_WORD_SIZE = 5; + + /** + * The default for minimal length of subwords that get propagated to the output of this filter + */ + public static final int DEFAULT_MIN_SUBWORD_SIZE = 2; + + /** + * The default for maximal length of subwords that get propagated to the output of this filter + */ + public static final int DEFAULT_MAX_SUBWORD_SIZE = 15; + + protected final CharArraySet dictionary; + protected final LinkedList tokens; + protected final int minWordSize; + protected final int minSubwordSize; + protected final int maxSubwordSize; + protected final boolean onlyLongestMatch; + + protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + + private AttributeSource.State current; + + protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) { + this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch); + } + + protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary) { + this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false); + } + + protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { + super(input); + this.tokens=new LinkedList<>(); + if (minWordSize < 0) { + throw new IllegalArgumentException("minWordSize cannot be negative"); + } + this.minWordSize=minWordSize; + if (minSubwordSize < 0) { + throw new IllegalArgumentException("minSubwordSize cannot be negative"); + } + this.minSubwordSize=minSubwordSize; + if (maxSubwordSize < 0) { + throw new IllegalArgumentException("maxSubwordSize cannot be negative"); + } + this.maxSubwordSize=maxSubwordSize; + this.onlyLongestMatch=onlyLongestMatch; + this.dictionary = dictionary; + } + + @Override + public final boolean incrementToken() throws IOException { + if (!tokens.isEmpty()) { + assert current != null; + CompoundToken token = tokens.removeFirst(); + restoreState(current); // keep all other attributes untouched + termAtt.setEmpty().append(token.txt); + offsetAtt.setOffset(token.startOffset, token.endOffset); + posIncAtt.setPositionIncrement(0); + return true; + } + + current = null; // not really needed, but for safety + if (input.incrementToken()) { + // Only words longer than minWordSize get processed + if (termAtt.length() >= this.minWordSize) { + decompose(); + // only capture the state if we really need it for producing new tokens + if (!tokens.isEmpty()) { + current = captureState(); + } + } + // return original token: + return true; + } else { + return false; + } + } + + /** Decomposes the current {@link #termAtt} and places {@link CompoundToken} instances in the {@link #tokens} list. + * The original token may not be placed in the list, as it is automatically passed through this filter. + */ + protected abstract void decompose(); + + @Override + public void reset() throws IOException { + super.reset(); + tokens.clear(); + current = null; + } + + /** + * Helper class to hold decompounded token information + */ + protected class CompoundToken { + public final CharSequence txt; + public final int startOffset, endOffset; + + /** Construct the compound token based on a slice of the current {@link Lucene43CompoundWordTokenFilterBase#termAtt}. */ + public CompoundToken(int offset, int length) { + this.txt = Lucene43CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length); + + // offsets of the original word + int startOff = Lucene43CompoundWordTokenFilterBase.this.offsetAtt.startOffset(); + int endOff = Lucene43CompoundWordTokenFilterBase.this.offsetAtt.endOffset(); + + if (endOff - startOff != Lucene43CompoundWordTokenFilterBase.this.termAtt.length()) { + // if length by start + end offsets doesn't match the term text then assume + // this is a synonym and don't adjust the offsets. + this.startOffset = startOff; + this.endOffset = endOff; + } else { + final int newStart = startOff + offset; + this.startOffset = newStart; + this.endOffset = newStart + length; + } + } + + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java new file mode 100644 index 00000000000..ec856e1924c --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java @@ -0,0 +1,100 @@ +package org.apache.lucene.analysis.compound; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.CharArraySet; + +/** + * A {@link TokenFilter} that decomposes compound words found in many Germanic languages, using + * pre-4.4 behavior. + * @deprecated Use {@link DictionaryCompoundWordTokenFilter}. + */ +@Deprecated +public class Lucene43DictionaryCompoundWordTokenFilter extends Lucene43CompoundWordTokenFilterBase { + + /** + * Creates a new {@link Lucene43DictionaryCompoundWordTokenFilter} + * + * @param input + * the {@link TokenStream} to process + * @param dictionary + * the word dictionary to match against. + */ + public Lucene43DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary) { + super(input, dictionary); + if (dictionary == null) { + throw new IllegalArgumentException("dictionary cannot be null"); + } + } + + /** + * Creates a new {@link Lucene43DictionaryCompoundWordTokenFilter} + * + * @param input + * the {@link TokenStream} to process + * @param dictionary + * the word dictionary to match against. + * @param minWordSize + * only words longer than this get processed + * @param minSubwordSize + * only subwords longer than this get to the output stream + * @param maxSubwordSize + * only subwords shorter than this get to the output stream + * @param onlyLongestMatch + * Add only the longest matching subword to the stream + */ + public Lucene43DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary, + int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { + super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); + if (dictionary == null) { + throw new IllegalArgumentException("dictionary cannot be null"); + } + } + + @Override + protected void decompose() { + final int len = termAtt.length(); + for (int i=0;i<=len-this.minSubwordSize;++i) { + CompoundToken longestMatchToken=null; + for (int j=this.minSubwordSize;j<=this.maxSubwordSize;++j) { + if(i+j>len) { + break; + } + if(dictionary.contains(termAtt.buffer(), i, j)) { + if (this.onlyLongestMatch) { + if (longestMatchToken!=null) { + if (longestMatchToken.txt.length() + * Calls {@link #Lucene43HyphenationCompoundWordTokenFilter(TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean) + * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, + * null, minWordSize, minSubwordSize, maxSubwordSize } + */ + public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input, + HyphenationTree hyphenator, int minWordSize, int minSubwordSize, + int maxSubwordSize) { + this(input, hyphenator, null, minWordSize, minSubwordSize, + maxSubwordSize, false); + } + + /** + * Create a HyphenationCompoundWordTokenFilter with no dictionary. + *

+ * Calls {@link #Lucene43HyphenationCompoundWordTokenFilter(TokenStream, HyphenationTree, int, int, int) + * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, + * DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE } + */ + public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input, + HyphenationTree hyphenator) { + this(input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, + DEFAULT_MAX_SUBWORD_SIZE); + } + + /** + * Create a hyphenator tree + * + * @param hyphenationFilename the filename of the XML grammar to load + * @return An object representing the hyphenation patterns + * @throws IOException If there is a low-level I/O error. + */ + public static HyphenationTree getHyphenationTree(String hyphenationFilename) + throws IOException { + return getHyphenationTree(new InputSource(hyphenationFilename)); + } + + /** + * Create a hyphenator tree + * + * @param hyphenationFile the file of the XML grammar to load + * @return An object representing the hyphenation patterns + * @throws IOException If there is a low-level I/O error. + */ + public static HyphenationTree getHyphenationTree(File hyphenationFile) + throws IOException { + return getHyphenationTree(new InputSource(hyphenationFile.toURI().toASCIIString())); + } + + /** + * Create a hyphenator tree + * + * @param hyphenationSource the InputSource pointing to the XML grammar + * @return An object representing the hyphenation patterns + * @throws IOException If there is a low-level I/O error. + */ + public static HyphenationTree getHyphenationTree(InputSource hyphenationSource) + throws IOException { + HyphenationTree tree = new HyphenationTree(); + tree.loadPatterns(hyphenationSource); + return tree; + } + + @Override + protected void decompose() { + // get the hyphenation points + Hyphenation hyphens = hyphenator.hyphenate(termAtt.buffer(), 0, termAtt.length(), 1, 1); + // No hyphen points found -> exit + if (hyphens == null) { + return; + } + + final int[] hyp = hyphens.getHyphenationPoints(); + + for (int i = 0; i < hyp.length; ++i) { + int remaining = hyp.length - i; + int start = hyp[i]; + CompoundToken longestMatchToken = null; + for (int j = 1; j < remaining; j++) { + int partLength = hyp[i + j] - start; + + // if the part is longer than maxSubwordSize we + // are done with this round + if (partLength > this.maxSubwordSize) { + break; + } + + // we only put subwords to the token stream + // that are longer than minPartSize + if (partLength < this.minSubwordSize) { + // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the + // calculation above, and we rely upon minSubwordSize being >=0 to filter them out... + continue; + } + + // check the dictionary + if (dictionary == null || dictionary.contains(termAtt.buffer(), start, partLength)) { + if (this.onlyLongestMatch) { + if (longestMatchToken != null) { + if (longestMatchToken.txt.length() < partLength) { + longestMatchToken = new CompoundToken(start, partLength); + } + } else { + longestMatchToken = new CompoundToken(start, partLength); + } + } else { + tokens.add(new CompoundToken(start, partLength)); + } + } else if (dictionary.contains(termAtt.buffer(), start, partLength - 1)) { + // check the dictionary again with a word that is one character + // shorter + // to avoid problems with genitive 's characters and other binding + // characters + if (this.onlyLongestMatch) { + if (longestMatchToken != null) { + if (longestMatchToken.txt.length() < partLength - 1) { + longestMatchToken = new CompoundToken(start, partLength - 1); + } + } else { + longestMatchToken = new CompoundToken(start, partLength - 1); + } + } else { + tokens.add(new CompoundToken(start, partLength - 1)); + } + } + } + if (this.onlyLongestMatch && longestMatchToken!=null) { + tokens.add(longestMatchToken); + } + } + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java index 888930f16ac..6002ea99309 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java @@ -17,8 +17,6 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.Analyzer; /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java index 876a6160f73..9997d40155a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java @@ -18,13 +18,11 @@ package org.apache.lucene.analysis.core; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; /** * Emits the entire input as a single token. diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java index c29bcd50992..8c5588626f6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; -import java.io.Reader; import java.util.Map; /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java index e0437b3d467..5c0b6d2bcc9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.Version; /** * A LetterTokenizer is a tokenizer that divides text at non-letters. That's to @@ -30,41 +29,25 @@ import org.apache.lucene.util.Version; * Note: this does a decent job for most European languages, but does a terrible * job for some Asian languages, where words are not separated by spaces. *

- *

- * - * You must specify the required {@link Version} compatibility when creating - * {@link LetterTokenizer}: - *

- *

*/ public class LetterTokenizer extends CharTokenizer { /** * Construct a new LetterTokenizer. - * - * @param matchVersion - * Lucene version to match See {@link
above} */ - public LetterTokenizer(Version matchVersion) { - super(matchVersion); + public LetterTokenizer() { } /** * Construct a new LetterTokenizer using a given * {@link org.apache.lucene.util.AttributeFactory}. * - * @param matchVersion - * Lucene version to match See {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public LetterTokenizer(Version matchVersion, AttributeFactory factory) { - super(matchVersion, factory); + public LetterTokenizer(AttributeFactory factory) { + super(factory); } /** Collects only characters which satisfy diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java index 4a06f3127d8..11dae66d2b2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java @@ -36,7 +36,6 @@ public class LetterTokenizerFactory extends TokenizerFactory { /** Creates a new LetterTokenizerFactory */ public LetterTokenizerFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -44,6 +43,6 @@ public class LetterTokenizerFactory extends TokenizerFactory { @Override public LetterTokenizer create(AttributeFactory factory) { - return new LetterTokenizer(luceneMatchVersion, factory); + return new LetterTokenizer(factory); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java index 1b0ffa408f9..7a7e96898a9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java @@ -23,30 +23,21 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; /** * Normalizes token text to lower case. - * - *

You must specify the required {@link Version} - * compatibility when creating LowerCaseFilter: - *

    - *
  • As of 3.1, supplementary characters are properly lowercased. - *
*/ public final class LowerCaseFilter extends TokenFilter { - private final CharacterUtils charUtils; + private final CharacterUtils charUtils = CharacterUtils.getInstance(); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new LowerCaseFilter, that normalizes token text to lower case. * - * @param matchVersion See
above * @param in TokenStream to filter */ - public LowerCaseFilter(Version matchVersion, TokenStream in) { + public LowerCaseFilter(TokenStream in) { super(in); - charUtils = CharacterUtils.getInstance(matchVersion); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java index 244722efcc0..ded2966292b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java @@ -40,7 +40,6 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT /** Creates a new LowerCaseFilterFactory */ public LowerCaseFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -48,7 +47,7 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT @Override public LowerCaseFilter create(TokenStream input) { - return new LowerCaseFilter(luceneMatchVersion,input); + return new LowerCaseFilter(input); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java index d61e1a938d9..66586f77154 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java @@ -17,13 +17,8 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; /** * LowerCaseTokenizer performs the function of LetterTokenizer @@ -35,41 +30,24 @@ import org.apache.lucene.util.Version; * Note: this does a decent job for most European languages, but does a terrible * job for some Asian languages, where words are not separated by spaces. *

- *

- * - * You must specify the required {@link Version} compatibility when creating - * {@link LowerCaseTokenizer}: - *

- *

*/ public final class LowerCaseTokenizer extends LetterTokenizer { /** * Construct a new LowerCaseTokenizer. - * - * @param matchVersion - * Lucene version to match See {@link
above} - * */ - public LowerCaseTokenizer(Version matchVersion) { - super(matchVersion); + public LowerCaseTokenizer() { } /** * Construct a new LowerCaseTokenizer using a given * {@link org.apache.lucene.util.AttributeFactory}. * - * @param matchVersion - * Lucene version to match See {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory) { - super(matchVersion, factory); + public LowerCaseTokenizer(AttributeFactory factory) { + super(factory); } /** Converts char to lower case diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java index 4af9a10484c..68b3c049722 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java @@ -39,7 +39,6 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi /** Creates a new LowerCaseTokenizerFactory */ public LowerCaseTokenizerFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -47,7 +46,7 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi @Override public LowerCaseTokenizer create(AttributeFactory factory) { - return new LowerCaseTokenizer(luceneMatchVersion, factory); + return new LowerCaseTokenizer(factory); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java index bc9a69b7f64..503b95ae72a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java @@ -17,38 +17,21 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.util.CharTokenizer; -import org.apache.lucene.util.Version; /** An {@link Analyzer} that filters {@link LetterTokenizer} * with {@link LowerCaseFilter} - *

- * You must specify the required {@link Version} compatibility - * when creating {@link CharTokenizer}: - *

- *

**/ public final class SimpleAnalyzer extends Analyzer { - private final Version matchVersion; - /** * Creates a new {@link SimpleAnalyzer} - * @param matchVersion Lucene version to match See {@link above} */ - public SimpleAnalyzer(Version matchVersion) { - this.matchVersion = matchVersion; + public SimpleAnalyzer() { } @Override protected TokenStreamComponents createComponents(final String fieldName) { - return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion)); + return new TokenStreamComponents(new LowerCaseTokenizer()); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java index fe85bc82e7a..102618f84be 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java @@ -27,20 +27,10 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; -import org.apache.lucene.util.Version; - -/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. - * - * - *

You must specify the required {@link Version} - * compatibility when creating StopAnalyzer: - *

-*/ +/** + * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. + */ public final class StopAnalyzer extends StopwordAnalyzerBase { /** An unmodifiable set containing some common English words that are not usually useful @@ -55,40 +45,35 @@ public final class StopAnalyzer extends StopwordAnalyzerBase { "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" ); - final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, - stopWords, false); + final CharArraySet stopSet = new CharArraySet(stopWords, false); ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); } /** Builds an analyzer which removes words in * {@link #ENGLISH_STOP_WORDS_SET}. - * @param matchVersion See above */ - public StopAnalyzer(Version matchVersion) { - this(matchVersion, ENGLISH_STOP_WORDS_SET); + public StopAnalyzer() { + this(ENGLISH_STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given set. - * @param matchVersion See above * @param stopWords Set of stop words */ - public StopAnalyzer(Version matchVersion, CharArraySet stopWords) { - super(matchVersion, stopWords); + public StopAnalyzer(CharArraySet stopWords) { + super(stopWords); } /** Builds an analyzer with the stop words from the given file. - * @see WordlistLoader#getWordSet(Reader, Version) - * @param matchVersion See above + * @see WordlistLoader#getWordSet(Reader) * @param stopwordsFile File to load stop words from */ - public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException { - this(matchVersion, loadStopwordSet(stopwordsFile, matchVersion)); + public StopAnalyzer(File stopwordsFile) throws IOException { + this(loadStopwordSet(stopwordsFile)); } /** Builds an analyzer with the stop words from the given reader. - * @see WordlistLoader#getWordSet(Reader, Version) - * @param matchVersion See above + * @see WordlistLoader#getWordSet(Reader) * @param stopwords Reader to load stop words from */ - public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException { - this(matchVersion, loadStopwordSet(stopwords, matchVersion)); + public StopAnalyzer(Reader stopwords) throws IOException { + this(loadStopwordSet(stopwords)); } /** @@ -102,9 +87,8 @@ public final class StopAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new LowerCaseTokenizer(matchVersion); - return new TokenStreamComponents(source, new StopFilter(matchVersion, - source, stopwords)); + final Tokenizer source = new LowerCaseTokenizer(); + return new TokenStreamComponents(source, new StopFilter(source, stopwords)); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java index 536d253671b..2c3f000e25d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java @@ -24,19 +24,9 @@ import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Removes stop words from a token stream. - * - * - *

You must specify the required {@link Version} - * compatibility when creating StopFilter: - *

    - *
  • As of 3.1, StopFilter correctly handles Unicode 4.0 - * supplementary characters in stopwords and position - * increments are preserved - *
*/ public final class StopFilter extends FilteringTokenFilter { @@ -47,17 +37,14 @@ public final class StopFilter extends FilteringTokenFilter { * Constructs a filter which removes words from the input TokenStream that are * named in the Set. * - * @param matchVersion - * Lucene version to enable correct Unicode 4.0 behavior in the stop - * set if Version > 3.0. See
above for details. * @param in * Input stream * @param stopWords * A {@link CharArraySet} representing the stopwords. - * @see #makeStopSet(Version, java.lang.String...) + * @see #makeStopSet(java.lang.String...) */ - public StopFilter(Version matchVersion, TokenStream in, CharArraySet stopWords) { - super(matchVersion, in); + public StopFilter(TokenStream in, CharArraySet stopWords) { + super(in); this.stopWords = stopWords; } @@ -67,12 +54,11 @@ public final class StopFilter extends FilteringTokenFilter { * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * - * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords An array of stopwords - * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase + * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ - public static CharArraySet makeStopSet(Version matchVersion, String... stopWords) { - return makeStopSet(matchVersion, stopWords, false); + public static CharArraySet makeStopSet(String... stopWords) { + return makeStopSet(stopWords, false); } /** @@ -81,38 +67,35 @@ public final class StopFilter extends FilteringTokenFilter { * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * - * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @return A Set ({@link CharArraySet}) containing the words - * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase + * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ - public static CharArraySet makeStopSet(Version matchVersion, List stopWords) { - return makeStopSet(matchVersion, stopWords, false); + public static CharArraySet makeStopSet(List stopWords) { + return makeStopSet(stopWords, false); } /** * Creates a stopword set from the given stopword array. * - * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords An array of stopwords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ - public static CharArraySet makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) { - CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase); + public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { + CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; } /** * Creates a stopword set from the given stopword list. - * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @param ignoreCase if true, all words are lower cased first * @return A Set ({@link CharArraySet}) containing the words */ - public static CharArraySet makeStopSet(Version matchVersion, List stopWords, boolean ignoreCase){ - CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase); + public static CharArraySet makeStopSet(List stopWords, boolean ignoreCase){ + CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java index 5e3c7e87fef..7bf32429b1f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java @@ -81,7 +81,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa /** Creates a new StopFilterFactory */ public StopFilterFactory(Map args) { super(args); - assureMatchVersion(); stopWordFiles = get(args, "words"); format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET)); ignoreCase = getBoolean(args, "ignoreCase", false); @@ -104,7 +103,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa if (null != format) { throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format); } - stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); + stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); } } @@ -118,7 +117,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa @Override public TokenStream create(TokenStream input) { - StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords); + StopFilter stopFilter = new StopFilter(input,stopWords); return stopFilter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java index 9c6bcbab744..d2791dfbf95 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java @@ -22,7 +22,6 @@ import java.util.Set; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.FilteringTokenFilter; -import org.apache.lucene.util.Version; /** * Removes tokens whose types appear in a set of blocked types from a token stream. @@ -35,14 +34,13 @@ public final class TypeTokenFilter extends FilteringTokenFilter { /** * Create a new {@link TypeTokenFilter}. - * @param version the Lucene match version * @param input the {@link TokenStream} to consume * @param stopTypes the types to filter * @param useWhiteList if true, then tokens whose type is in stopTypes will * be kept, otherwise they will be filtered out */ - public TypeTokenFilter(Version version, TokenStream input, Set stopTypes, boolean useWhiteList) { - super(version, input); + public TypeTokenFilter(TokenStream input, Set stopTypes, boolean useWhiteList) { + super(input); this.stopTypes = stopTypes; this.useWhiteList = useWhiteList; } @@ -50,10 +48,9 @@ public final class TypeTokenFilter extends FilteringTokenFilter { /** * Create a new {@link TypeTokenFilter} that filters tokens out * (useWhiteList=false). - * @see #TypeTokenFilter(Version, TokenStream, Set, boolean) */ - public TypeTokenFilter(Version version, TokenStream input, Set stopTypes) { - this(version, input, stopTypes, false); + public TypeTokenFilter(TokenStream input, Set stopTypes) { + this(input, stopTypes, false); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java index 0545d754133..089ef7adb83 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java @@ -72,7 +72,7 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour @Override public TokenStream create(TokenStream input) { - final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, input, stopTypes, useWhitelist); + final TokenStream filter = new TypeTokenFilter(input, stopTypes, useWhitelist); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java index 2625d4f5ebf..6fdae1b685a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java @@ -23,13 +23,9 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; /** * Normalizes token text to UPPER CASE. - * - *

You must specify the required {@link Version} - * compatibility when creating UpperCaseFilter * *

NOTE: In Unicode, this transformation may lose information when the * upper case character represents more than one lower case character. Use this filter @@ -37,18 +33,16 @@ import org.apache.lucene.util.Version; * general search matching */ public final class UpperCaseFilter extends TokenFilter { - private final CharacterUtils charUtils; + private final CharacterUtils charUtils = CharacterUtils.getInstance(); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new UpperCaseFilter, that normalizes token text to upper case. * - * @param matchVersion See above * @param in TokenStream to filter */ - public UpperCaseFilter(Version matchVersion, TokenStream in) { + public UpperCaseFilter(TokenStream in) { super(in); - charUtils = CharacterUtils.getInstance(matchVersion); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java index 60f1119405a..ac97ad7bd0a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java @@ -45,7 +45,6 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT /** Creates a new UpperCaseFilterFactory */ public UpperCaseFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -53,7 +52,7 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT @Override public UpperCaseFilter create(TokenStream input) { - return new UpperCaseFilter(luceneMatchVersion,input); + return new UpperCaseFilter(input); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java index 2fdc3f3dcc3..855f4f6a88c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java @@ -17,38 +17,21 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.util.CharTokenizer; -import org.apache.lucene.util.Version; /** * An Analyzer that uses {@link WhitespaceTokenizer}. - *

- * You must specify the required {@link Version} compatibility - * when creating {@link CharTokenizer}: - *

- *

**/ public final class WhitespaceAnalyzer extends Analyzer { - private final Version matchVersion; - /** * Creates a new {@link WhitespaceAnalyzer} - * @param matchVersion Lucene version to match See {@link above} */ - public WhitespaceAnalyzer(Version matchVersion) { - this.matchVersion = matchVersion; + public WhitespaceAnalyzer() { } @Override protected TokenStreamComponents createComponents(final String fieldName) { - return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion)); + return new TokenStreamComponents(new WhitespaceTokenizer()); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java index 354322c444d..f38b07aed64 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java @@ -17,50 +17,31 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; /** * A WhitespaceTokenizer is a tokenizer that divides text at whitespace. - * Adjacent sequences of non-Whitespace characters form tokens. - *

- * You must specify the required {@link Version} compatibility when creating - * {@link WhitespaceTokenizer}: - *

+ * Adjacent sequences of non-Whitespace characters form tokens. */ public final class WhitespaceTokenizer extends CharTokenizer { /** - * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version - * to match See {@link above} - * + * Construct a new WhitespaceTokenizer. */ - public WhitespaceTokenizer(Version matchVersion) { - super(matchVersion); + public WhitespaceTokenizer() { } /** * Construct a new WhitespaceTokenizer using a given * {@link org.apache.lucene.util.AttributeFactory}. * - * @param - * matchVersion Lucene version to match See - * {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory) { - super(matchVersion, factory); + public WhitespaceTokenizer(AttributeFactory factory) { + super(factory); } /** Collects only characters which do not satisfy diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java index e23ee869665..708996362a7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; -import java.io.Reader; import java.util.Map; /** @@ -37,7 +36,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory { /** Creates a new WhitespaceTokenizerFactory */ public WhitespaceTokenizerFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -45,6 +43,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory { @Override public WhitespaceTokenizer create(AttributeFactory factory) { - return new WhitespaceTokenizer(luceneMatchVersion, factory); + return new WhitespaceTokenizer(factory); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java index b54739be60e..e8f49ef3616 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import java.io.*; import java.nio.charset.StandardCharsets; @@ -61,7 +60,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#"); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,34 +74,30 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}). - * - * @param matchVersion Lucene version to match */ - public CzechAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_SET); + public CzechAnalyzer() { + this(DefaultSetHolder.DEFAULT_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion Lucene version to match * @param stopwords a stopword set */ - public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public CzechAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words and a set of work to be * excluded from the {@link CzechStemFilter}. * - * @param matchVersion Lucene version to match * @param stopwords a stopword set * @param stemExclusionTable a stemming exclusion set */ - public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) { - super(matchVersion, stopwords); - this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); + public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable) { + super(stopwords); + this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable)); } /** @@ -115,16 +110,16 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} * , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If * a stem exclusion set is provided via - * {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a + * {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a * {@link SetKeywordMarkerFilter} is added before * {@link CzechStemFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter( matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!this.stemExclusionTable.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionTable); result = new CzechStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java index 00f7520af9f..7f2720addc2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.DanishStemmer; /** @@ -64,7 +63,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -76,18 +75,17 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public DanishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public DanishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public DanishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public DanishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -95,14 +93,12 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public DanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public DanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -119,10 +115,10 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new DanishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java index 6cab61ea1f4..f2d29b4385f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java @@ -36,7 +36,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for German language. @@ -48,7 +47,7 @@ import org.apache.lucene.util.Version; * exclusion list is empty by default. *

* - *

NOTE: This class uses the same {@link Version} + *

NOTE: This class uses the same {@link org.apache.lucene.util.Version} * dependent settings as {@link StandardAnalyzer}.

*/ public final class GermanAnalyzer extends StopwordAnalyzerBase { @@ -69,7 +68,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -91,35 +90,31 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { * Builds an analyzer with the default stop words: * {@link #getDefaultStopSet()}. */ - public GermanAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_SET); + public GermanAnalyzer() { + this(DefaultSetHolder.DEFAULT_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public GermanAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public GermanAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a stemming exclusion set */ - public GermanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); + public GermanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -135,10 +130,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter( matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); result = new SetKeywordMarkerFilter(result, exclusionSet); result = new GermanNormalizationFilter(result); result = new GermanLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java index 4f418ee183f..c80c27200b8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java @@ -28,7 +28,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for the Greek language. @@ -38,7 +37,7 @@ import org.apache.lucene.util.Version; * A default set of stopwords is used unless an alternative list is specified. *

* - *

NOTE: This class uses the same {@link Version} + *

NOTE: This class uses the same {@link org.apache.lucene.util.Version} * dependent settings as {@link StandardAnalyzer}.

*/ public final class GreekAnalyzer extends StopwordAnalyzerBase { @@ -69,10 +68,9 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words. - * @param matchVersion Lucene compatibility version */ - public GreekAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_SET); + public GreekAnalyzer() { + this(DefaultSetHolder.DEFAULT_SET); } /** @@ -81,11 +79,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase { * NOTE: The stopwords set should be pre-processed with the logic of * {@link GreekLowerCaseFilter} for best results. * - * @param matchVersion Lucene compatibility version * @param stopwords a stopword set */ - public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) { - super(matchVersion, stopwords); + public GreekAnalyzer(CharArraySet stopwords) { + super(stopwords); } /** @@ -100,10 +97,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new GreekLowerCaseFilter(matchVersion, source); - result = new StandardFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new GreekLowerCaseFilter(source); + result = new StandardFilter(result); + result = new StopFilter(result, stopwords); result = new GreekStemFilter(result); return new TokenStreamComponents(source, result); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java index ba0a20ac29e..66d4aa6a602 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java @@ -22,32 +22,22 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; /** * Normalizes token text to lower case, removes some Greek diacritics, * and standardizes final sigma to sigma. - * - *

You must specify the required {@link Version} - * compatibility when creating GreekLowerCaseFilter: - *

    - *
  • As of 3.1, supplementary characters are properly lowercased. - *
*/ public final class GreekLowerCaseFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final CharacterUtils charUtils; + private final CharacterUtils charUtils = CharacterUtils.getInstance(); /** * Create a GreekLowerCaseFilter that normalizes Greek token text. * - * @param matchVersion Lucene compatibility version, - * See
above * @param in TokenStream to filter */ - public GreekLowerCaseFilter(Version matchVersion, TokenStream in) { + public GreekLowerCaseFilter(TokenStream in) { super(in); - this.charUtils = CharacterUtils.getInstance(matchVersion); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java index 15b6f9251c1..5ff0c90f63e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java @@ -40,7 +40,6 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M /** Creates a new GreekLowerCaseFilterFactory */ public GreekLowerCaseFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -48,7 +47,7 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M @Override public GreekLowerCaseFilter create(TokenStream in) { - return new GreekLowerCaseFilter(luceneMatchVersion, in); + return new GreekLowerCaseFilter(in); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java index f714e54c3d0..750bd3589ae 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java @@ -1,7 +1,6 @@ package org.apache.lucene.analysis.el; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; import java.util.Arrays; @@ -205,7 +204,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc4 = new CharArraySet( Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"), false); @@ -231,7 +230,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc6 = new CharArraySet( Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ", "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ", "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ", @@ -256,7 +255,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc7 = new CharArraySet( Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ", "πεθ", "πικρ", "ποτ", "σιχ", "χ"), false); @@ -283,11 +282,11 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc8a = new CharArraySet( Arrays.asList("τρ", "τσ"), false); - private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc8b = new CharArraySet( Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ", "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ", "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ", @@ -346,7 +345,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc9 = new CharArraySet( Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ", "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ", "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), @@ -434,11 +433,11 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc12a = new CharArraySet( Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"), false); - private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc12b = new CharArraySet( Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"), false); @@ -458,7 +457,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc13 = new CharArraySet( Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"), false); @@ -492,7 +491,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc14 = new CharArraySet( Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ", "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ", "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε", @@ -530,7 +529,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc15a = new CharArraySet( Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ", "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ", "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ", @@ -539,7 +538,7 @@ public class GreekStemmer { "ουλαμ", "ουρ", "π", "τρ", "μ"), false); - private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc15b = new CharArraySet( Arrays.asList("ψοφ", "ναυλοχ"), false); @@ -576,7 +575,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc16 = new CharArraySet( Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"), false); @@ -596,7 +595,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc17 = new CharArraySet( Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"), false); @@ -610,7 +609,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc18 = new CharArraySet( Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"), false); @@ -634,7 +633,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_CURRENT, + private static final CharArraySet exc19 = new CharArraySet( Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"), false); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java index 934540a3ec9..15bfb51a518 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java @@ -30,7 +30,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for English. @@ -57,18 +56,17 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. */ - public EnglishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public EnglishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public EnglishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -76,14 +74,12 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -101,11 +97,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new EnglishPossessiveFilter(matchVersion, result); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new EnglishPossessiveFilter(result); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PorterStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java index 9f6f21884d2..e4e03a1c1b3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.util.Version; /** * TokenFilter that removes possessives (trailing 's) from words. @@ -30,8 +29,7 @@ import org.apache.lucene.util.Version; public final class EnglishPossessiveFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - // NOTE: version now unused - public EnglishPossessiveFilter(Version version, TokenStream input) { + public EnglishPossessiveFilter(TokenStream input) { super(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java index f1685a7941b..40f1d30751d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java @@ -39,7 +39,6 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory { /** Creates a new EnglishPossessiveFilterFactory */ public EnglishPossessiveFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -47,6 +46,6 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory { @Override public TokenStream create(TokenStream input) { - return new EnglishPossessiveFilter(luceneMatchVersion, input); + return new EnglishPossessiveFilter(input); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java index cdb397b93bf..b4d68a5f797 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java @@ -64,7 +64,6 @@ import org.apache.lucene.analysis.util.OpenStringBuilder; *

Copyright: Copyright 2008, Luicid Imagination, Inc.

*

Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu)

*/ -import org.apache.lucene.util.Version; /** * This class implements the Kstem algorithm @@ -280,7 +279,7 @@ public class KStemmer { DictEntry defaultEntry; DictEntry entry; - CharArrayMap d = new CharArrayMap<>(Version.LUCENE_CURRENT, 1000, false); + CharArrayMap d = new CharArrayMap<>(1000, false); for (int i = 0; i < exceptionWords.length; i++) { if (!d.containsKey(exceptionWords[i])) { entry = new DictEntry(exceptionWords[i], true); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java index 2ce1965af61..3c2812bbd2d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Spanish. @@ -63,7 +62,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,18 +74,17 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public SpanishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public SpanishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public SpanishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -94,14 +92,12 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -118,10 +114,10 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SpanishLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java index 12bb7a3ef5d..4222e5a0998 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java @@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.BasqueStemmer; /** @@ -73,18 +72,17 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public BasqueAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public BasqueAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public BasqueAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -92,14 +90,12 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -116,10 +112,10 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new BasqueStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java index 1f1b4b2bdb0..df9c2fb5c96 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Persian. @@ -87,20 +86,18 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. */ - public PersianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public PersianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public PersianAnalyzer(Version matchVersion, CharArraySet stopwords){ - super(matchVersion, stopwords); + public PersianAnalyzer(CharArraySet stopwords){ + super(stopwords); } /** @@ -115,8 +112,8 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new LowerCaseFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); @@ -124,7 +121,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { * the order here is important: the stopword list is normalized with the * above! */ - return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords)); + return new TokenStreamComponents(source, new StopFilter(result, stopwords)); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java index 5f824429772..84a3c4ffd5f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.FinnishStemmer; /** @@ -64,7 +63,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -76,18 +75,17 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public FinnishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public FinnishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public FinnishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -95,14 +93,12 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public FinnishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -119,10 +115,10 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new FinnishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java index b86fb80cb86..f0acba32e48 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java @@ -32,7 +32,6 @@ import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -49,7 +48,7 @@ import java.util.Arrays; * exclusion list is empty by default. *

* - *

NOTE: This class uses the same {@link Version} + *

NOTE: This class uses the same {@link org.apache.lucene.util.Version} * dependent settings as {@link StandardAnalyzer}.

*/ public final class FrenchAnalyzer extends StopwordAnalyzerBase { @@ -59,7 +58,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { /** Default set of articles for ElisionFilter */ public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( + new CharArraySet(Arrays.asList( "l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true)); /** @@ -80,7 +79,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -92,37 +91,33 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}). */ - public FrenchAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public FrenchAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){ - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public FrenchAnalyzer(CharArraySet stopwords){ + this(stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set * @param stemExclutionSet * a stemming exclusion set */ - public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords, + public FrenchAnalyzer(CharArraySet stopwords, CharArraySet stemExclutionSet) { - super(matchVersion, stopwords); + super(stopwords); this.excltable = CharArraySet.unmodifiableSet(CharArraySet - .copy(matchVersion, stemExclutionSet)); + .copy(stemExclutionSet)); } /** @@ -139,11 +134,11 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); result = new ElisionFilter(result, DEFAULT_ARTICLES); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!excltable.isEmpty()) result = new SetKeywordMarkerFilter(result, excltable); result = new FrenchLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java index 089e123845b..00413d55cf1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java @@ -32,7 +32,6 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.IrishStemmer; /** @@ -45,7 +44,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet(Version.LUCENE_CURRENT, + new CharArraySet( Arrays.asList( "d", "m", "b" ), true)); @@ -56,7 +55,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { * with phrase queries versus tAthair (which would not have a gap). */ private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet( - new CharArraySet(Version.LUCENE_CURRENT, + new CharArraySet( Arrays.asList( "h", "n", "t" ), true)); @@ -91,18 +90,17 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public IrishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public IrishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public IrishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -110,14 +108,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public IrishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -134,12 +130,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new StopFilter(matchVersion, result, HYPHENATIONS); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new StopFilter(result, HYPHENATIONS); result = new ElisionFilter(result, DEFAULT_ARTICLES); result = new IrishLowerCaseFilter(result); - result = new StopFilter(matchVersion, result, stopwords); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new IrishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java index a40276ff6de..b79245ba15e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Galician. @@ -62,7 +61,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,18 +73,17 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public GalicianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public GalicianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public GalicianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,14 +91,12 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -117,10 +113,10 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new GalicianStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java index 1edd0e8030e..4ee31f13ddb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.in.IndicNormalizationFilter; -import org.apache.lucene.util.Version; /** * Analyzer for Hindi. @@ -75,32 +74,29 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the given stop words * - * @param version lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a stemming exclusion set */ - public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(version, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet( - CharArraySet.copy(matchVersion, stemExclusionSet)); + public HindiAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** * Builds an analyzer with the given stop words * - * @param version lucene compatibility version * @param stopwords a stopword set */ - public HindiAnalyzer(Version version, CharArraySet stopwords) { - this(version, stopwords, CharArraySet.EMPTY_SET); + public HindiAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. */ - public HindiAnalyzer(Version version) { - this(version, DefaultSetHolder.DEFAULT_STOP_SET); + public HindiAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** @@ -117,13 +113,13 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new LowerCaseFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); if (!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new IndicNormalizationFilter(result); result = new HindiNormalizationFilter(result); - result = new StopFilter(matchVersion, result, stopwords); + result = new StopFilter(result, stopwords); result = new HindiStemFilter(result); return new TokenStreamComponents(source, result); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java index d2addb81747..8784e3bbb31 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.HungarianStemmer; /** @@ -64,7 +63,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -76,18 +75,17 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public HungarianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public HungarianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public HungarianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -95,14 +93,12 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public HungarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -119,10 +115,10 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new HungarianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index ef4b26d2212..c5306148788 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -28,7 +28,6 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Outputs; @@ -215,7 +214,7 @@ final class Stemmer { if (stems.size() < 2) { return stems; } - CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase); + CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase); List deduped = new ArrayList<>(); for (CharsRef s : stems) { if (!terms.contains(s)) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java index 0f5065954ad..ae22c47d8b4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java @@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.ArmenianStemmer; /** @@ -73,18 +72,17 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public ArmenianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public ArmenianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public ArmenianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -92,14 +90,12 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -116,10 +112,10 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new ArmenianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java index 85bd081e7a4..d54b3609597 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * Analyzer for Indonesian (Bahasa) @@ -69,20 +68,18 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public IndonesianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public IndonesianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){ - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public IndonesianAnalyzer(CharArraySet stopwords){ + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -90,17 +87,14 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * {@link IndonesianStemFilter}. * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a set of terms not to be stemmed */ - public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){ - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public IndonesianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){ + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -116,10 +110,10 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java index 382bfaef9c8..afae44def4c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java @@ -36,7 +36,6 @@ import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Italian. @@ -48,7 +47,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt"; private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet(Version.LUCENE_CURRENT, + new CharArraySet( Arrays.asList( "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d" @@ -72,7 +71,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -84,18 +83,17 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public ItalianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public ItalianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public ItalianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -103,14 +101,12 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -127,11 +123,11 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); result = new ElisionFilter(result, DEFAULT_ARTICLES); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new ItalianLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java index c6b80ed756b..0d858428cac 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Latvian. @@ -62,7 +61,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,18 +73,17 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public LatvianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public LatvianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public LatvianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,14 +91,12 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -117,10 +113,10 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new LatvianStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java index 4c9743caf2b..986994e5121 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java @@ -82,7 +82,7 @@ public class CapitalizationFilterFactory extends TokenFilterFactory { boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false); Set k = getSet(args, KEEP); if (k != null) { - keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase); + keep = new CharArraySet(10, ignoreCase); keep.addAll(k); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java index 5f501e06cb2..8e1726fb5ce 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.util.Version; /** * Removes words that are too long or too short from the stream. @@ -39,13 +38,12 @@ public final class CodepointCountFilter extends FilteringTokenFilter { * Create a new {@link CodepointCountFilter}. This will filter out tokens whose * {@link CharTermAttribute} is either too short ({@link Character#codePointCount(char[], int, int)} * < min) or too long ({@link Character#codePointCount(char[], int, int)} > max). - * @param version the Lucene match version * @param in the {@link TokenStream} to consume * @param min the minimum length * @param max the maximum length */ - public CodepointCountFilter(Version version, TokenStream in, int min, int max) { - super(version, in); + public CodepointCountFilter(TokenStream in, int min, int max) { + super(in); if (min < 0) { throw new IllegalArgumentException("minimum length must be greater than or equal to zero"); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java index 54250641d32..d42d7f6452f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java @@ -50,6 +50,6 @@ public class CodepointCountFilterFactory extends TokenFilterFactory { @Override public CodepointCountFilter create(TokenStream input) { - return new CodepointCountFilter(luceneMatchVersion, input, min, max); + return new CodepointCountFilter(input, min, max); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java index c77e3a7614b..093d22b60b9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java @@ -21,7 +21,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * A TokenFilter that only keeps tokens with text contained in the @@ -37,12 +36,11 @@ public final class KeepWordFilter extends FilteringTokenFilter { * Create a new {@link KeepWordFilter}. *

NOTE: The words set passed to this constructor will be directly * used by this filter and should not be modified. - * @param version the Lucene match version * @param in the {@link TokenStream} to consume * @param words the words to keep */ - public KeepWordFilter(Version version, TokenStream in, CharArraySet words) { - super(version, in); + public KeepWordFilter(TokenStream in, CharArraySet words) { + super(in); this.words = words; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java index 78c831bc8ce..7d4c24e0968 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java @@ -44,7 +44,6 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc /** Creates a new KeepWordFilterFactory */ public KeepWordFilterFactory(Map args) { super(args); - assureMatchVersion(); wordFiles = get(args, "words"); ignoreCase = getBoolean(args, "ignoreCase", false); if (!args.isEmpty()) { @@ -73,7 +72,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc if (words == null) { return input; } else { - final TokenStream filter = new KeepWordFilter(luceneMatchVersion, input, words); + final TokenStream filter = new KeepWordFilter(input, words); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java index f35afc68b6f..bd7e2232023 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.util.Version; /** * Removes words that are too long or too short from the stream. @@ -39,13 +38,12 @@ public final class LengthFilter extends FilteringTokenFilter { * Create a new {@link LengthFilter}. This will filter out tokens whose * {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()} * < min) or too long ({@link CharTermAttribute#length()} > max). - * @param version the Lucene match version * @param in the {@link TokenStream} to consume * @param min the minimum length * @param max the maximum length */ - public LengthFilter(Version version, TokenStream in, int min, int max) { - super(version, in); + public LengthFilter(TokenStream in, int min, int max) { + super(in); if (min < 0) { throw new IllegalArgumentException("minimum length must be greater than or equal to zero"); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java index 6d63623e0fb..476f37543ea 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java @@ -50,7 +50,7 @@ public class LengthFilterFactory extends TokenFilterFactory { @Override public LengthFilter create(TokenStream input) { - final LengthFilter filter = new LengthFilter(luceneMatchVersion, input,min,max); + final LengthFilter filter = new LengthFilter(input,min,max); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java index e3c7a033bdb..a1785abcf73 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java @@ -22,7 +22,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; import java.io.IOException; @@ -34,8 +33,7 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); - // use a fixed version, as we don't care about case sensitivity. - private final CharArraySet previous = new CharArraySet(Version.LUCENE_CURRENT, 8, false); + private final CharArraySet previous = new CharArraySet(8, false); /** * Creates a new RemoveDuplicatesTokenFilter diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java index 6dadf820933..20803202c77 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java @@ -20,15 +20,11 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.util.Version; import java.io.IOException; /** * Trims leading and trailing whitespace from Tokens in the stream. - *

As of Lucene 4.4, this filter does not support updateOffsets=true anymore - * as it can lead to broken token streams. */ public final class TrimFilter extends TokenFilter { @@ -36,10 +32,9 @@ public final class TrimFilter extends TokenFilter { /** * Create a new {@link TrimFilter}. - * @param version the Lucene match version * @param in the stream to consume */ - public TrimFilter(Version version, TokenStream in) { + public TrimFilter(TokenStream in) { super(in); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java index c21233119cd..58c400b9bd6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java @@ -47,7 +47,7 @@ public class TrimFilterFactory extends TokenFilterFactory { @Override public TrimFilter create(TokenStream input) { - final TrimFilter filter = new TrimFilter(luceneMatchVersion, input); + final TrimFilter filter = new TrimFilter(input); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java index f93c0a7e773..e158910035a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java @@ -30,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.Version; import java.io.IOException; import java.util.Arrays; @@ -206,11 +205,8 @@ public final class WordDelimiterFilter extends TokenFilter { * @param configurationFlags Flags configuring the filter * @param protWords If not null is the set of tokens to protect from being delimited */ - public WordDelimiterFilter(Version matchVersion, TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) { + public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) { super(in); - if (!matchVersion.onOrAfter(Version.LUCENE_4_8)) { - throw new IllegalArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter"); - } this.flags = configurationFlags; this.protWords = protWords; this.iterator = new WordDelimiterIterator( @@ -225,8 +221,8 @@ public final class WordDelimiterFilter extends TokenFilter { * @param configurationFlags Flags configuring the filter * @param protWords If not null is the set of tokens to protect from being delimited */ - public WordDelimiterFilter(Version matchVersion, TokenStream in, int configurationFlags, CharArraySet protWords) { - this(matchVersion, in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords); + public WordDelimiterFilter(TokenStream in, int configurationFlags, CharArraySet protWords) { + this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java index c5c478da771..780b68e1e9a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java @@ -119,7 +119,7 @@ public class WordDelimiterFilterFactory extends TokenFilterFactory implements Re @Override public TokenFilter create(TokenStream input) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_8)) { - return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, + return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, flags, protectedWords); } else { return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java index f4647249d08..7e4a063fdba 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java @@ -18,8 +18,11 @@ package org.apache.lucene.analysis.ngram; */ import java.util.Map; + +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.util.Version; /** * Creates new instances of {@link EdgeNGramTokenFilter}. @@ -46,7 +49,10 @@ public class EdgeNGramFilterFactory extends TokenFilterFactory { } @Override - public EdgeNGramTokenFilter create(TokenStream input) { - return new EdgeNGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize); + public TokenFilter create(TokenStream input) { + if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) { + return new EdgeNGramTokenFilter(input, minGramSize, maxGramSize); + } + return new Lucene43EdgeNGramTokenFilter(input, minGramSize, maxGramSize); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java index 20fda83c449..219d4ca8d15 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java @@ -26,7 +26,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; /** * Tokenizes the given token into n-grams of given size(s). @@ -59,18 +58,13 @@ public final class EdgeNGramTokenFilter extends TokenFilter { /** * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range * - * @param version the Lucene match version * @param input {@link TokenStream} holding the input to be tokenized * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ - public EdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) { + public EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) { super(input); - if (version == null) { - throw new IllegalArgumentException("version must not be null"); - } - if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); } @@ -79,9 +73,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter { throw new IllegalArgumentException("minGram must not be greater than maxGram"); } - this.charUtils = version.onOrAfter(Version.LUCENE_4_4) - ? CharacterUtils.getInstance(version) - : CharacterUtils.getJava4Instance(); + this.charUtils = CharacterUtils.getInstance(); this.minGram = minGram; this.maxGram = maxGram; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java index 8b4d42fc88d..9e277abcd63 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java @@ -17,8 +17,6 @@ package org.apache.lucene.analysis.ngram; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.Version; @@ -38,24 +36,22 @@ public class EdgeNGramTokenizer extends NGramTokenizer { /** * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * - * @param version the Lucene match version * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ - public EdgeNGramTokenizer(Version version, int minGram, int maxGram) { - super(version, minGram, maxGram, true); + public EdgeNGramTokenizer(int minGram, int maxGram) { + super(minGram, maxGram, true); } /** * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * - * @param version the Lucene match version * @param factory {@link org.apache.lucene.util.AttributeFactory} to use * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ - public EdgeNGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram) { - super(version, factory, minGram, maxGram, true); + public EdgeNGramTokenizer(AttributeFactory factory, int minGram, int maxGram) { + super(factory, minGram, maxGram, true); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java index 2990513f597..9772d3c98fe 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java @@ -17,8 +17,10 @@ package org.apache.lucene.analysis.ngram; * limitations under the License. */ +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.Version; import java.io.Reader; import java.util.Map; @@ -47,7 +49,10 @@ public class EdgeNGramTokenizerFactory extends TokenizerFactory { } @Override - public EdgeNGramTokenizer create(AttributeFactory factory) { - return new EdgeNGramTokenizer(luceneMatchVersion, factory, minGramSize, maxGramSize); + public Tokenizer create(AttributeFactory factory) { + if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) { + return new EdgeNGramTokenizer(factory, minGramSize, maxGramSize); + } + return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java new file mode 100644 index 00000000000..d465ce9e1a1 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java @@ -0,0 +1,126 @@ +package org.apache.lucene.analysis.ngram; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.util.CharacterUtils; + +import java.io.IOException; + +/** + * Tokenizes the given token into n-grams of given size(s), using pre-4.4 behavior. + * + * @deprecated Use {@link org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter}. + */ +@Deprecated +public final class Lucene43EdgeNGramTokenFilter extends TokenFilter { + public static final int DEFAULT_MAX_GRAM_SIZE = 1; + public static final int DEFAULT_MIN_GRAM_SIZE = 1; + + private final CharacterUtils charUtils; + private final int minGram; + private final int maxGram; + private char[] curTermBuffer; + private int curTermLength; + private int curCodePointCount; + private int curGramSize; + private int tokStart; + private int tokEnd; // only used if the length changed before this filter + private int savePosIncr; + private int savePosLen; + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); + + /** + * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range + * + * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized + * @param minGram the smallest n-gram to generate + * @param maxGram the largest n-gram to generate + */ + public Lucene43EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) { + super(input); + + if (minGram < 1) { + throw new IllegalArgumentException("minGram must be greater than zero"); + } + + if (minGram > maxGram) { + throw new IllegalArgumentException("minGram must not be greater than maxGram"); + } + + this.charUtils = CharacterUtils.getJava4Instance(); + this.minGram = minGram; + this.maxGram = maxGram; + } + + @Override + public final boolean incrementToken() throws IOException { + while (true) { + if (curTermBuffer == null) { + if (!input.incrementToken()) { + return false; + } else { + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); + curCodePointCount = charUtils.codePointCount(termAtt); + curGramSize = minGram; + tokStart = offsetAtt.startOffset(); + tokEnd = offsetAtt.endOffset(); + savePosIncr += posIncrAtt.getPositionIncrement(); + savePosLen = posLenAtt.getPositionLength(); + } + } + if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit + if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams + // grab gramSize chars from front or back + clearAttributes(); + offsetAtt.setOffset(tokStart, tokEnd); + // first ngram gets increment, others don't + if (curGramSize == minGram) { + posIncrAtt.setPositionIncrement(savePosIncr); + savePosIncr = 0; + } else { + posIncrAtt.setPositionIncrement(0); + } + posLenAtt.setPositionLength(savePosLen); + final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize); + termAtt.copyBuffer(curTermBuffer, 0, charLength); + curGramSize++; + return true; + } + } + curTermBuffer = null; + } + } + + @Override + public void reset() throws IOException { + super.reset(); + curTermBuffer = null; + savePosIncr = 0; + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java new file mode 100644 index 00000000000..5bb12d402cc --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java @@ -0,0 +1,53 @@ +package org.apache.lucene.analysis.ngram; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.AttributeFactory; + +/** + * Tokenizes the input from an edge into n-grams of given size(s), using pre-4.4 behavior. + * + * @deprecated Use {@link org.apache.lucene.analysis.ngram.EdgeNGramTokenizer}. + */ +@Deprecated +public class Lucene43EdgeNGramTokenizer extends Lucene43NGramTokenizer { + public static final int DEFAULT_MAX_GRAM_SIZE = 1; + public static final int DEFAULT_MIN_GRAM_SIZE = 1; + + /** + * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range + * + * @param minGram the smallest n-gram to generate + * @param maxGram the largest n-gram to generate + */ + public Lucene43EdgeNGramTokenizer(int minGram, int maxGram) { + super(minGram, maxGram); + } + + /** + * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range + * + * @param factory {@link org.apache.lucene.util.AttributeFactory} to use + * @param minGram the smallest n-gram to generate + * @param maxGram the largest n-gram to generate + */ + public Lucene43EdgeNGramTokenizer(AttributeFactory factory, int minGram, int maxGram) { + super(factory, minGram, maxGram); + } + +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java new file mode 100644 index 00000000000..1205fb34fb2 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java @@ -0,0 +1,150 @@ +package org.apache.lucene.analysis.ngram; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.util.CharacterUtils; + +import java.io.IOException; + +/** + * Tokenizes the input into n-grams of the given size(s), matching Lucene 4.3 and before behavior. + * + * @deprecated Use {@link org.apache.lucene.analysis.ngram.NGramTokenFilter} instead. + */ +@Deprecated +public final class Lucene43NGramTokenFilter extends TokenFilter { + public static final int DEFAULT_MIN_NGRAM_SIZE = 1; + public static final int DEFAULT_MAX_NGRAM_SIZE = 2; + + private final int minGram, maxGram; + + private char[] curTermBuffer; + private int curTermLength; + private int curCodePointCount; + private int curGramSize; + private int curPos; + private int curPosInc, curPosLen; + private int tokStart; + private int tokEnd; + private boolean hasIllegalOffsets; // only if the length changed before this filter + + private final CharacterUtils charUtils; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncAtt; + private final PositionLengthAttribute posLenAtt; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + /** + * Creates Lucene43NGramTokenFilter with given min and max n-grams. + * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized + * @param minGram the smallest n-gram to generate + * @param maxGram the largest n-gram to generate + */ + public Lucene43NGramTokenFilter(TokenStream input, int minGram, int maxGram) { + super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE)); + this.charUtils = CharacterUtils.getJava4Instance(); + if (minGram < 1) { + throw new IllegalArgumentException("minGram must be greater than zero"); + } + if (minGram > maxGram) { + throw new IllegalArgumentException("minGram must not be greater than maxGram"); + } + this.minGram = minGram; + this.maxGram = maxGram; + + posIncAtt = new PositionIncrementAttribute() { + @Override + public void setPositionIncrement(int positionIncrement) {} + @Override + public int getPositionIncrement() { + return 0; + } + }; + posLenAtt = new PositionLengthAttribute() { + @Override + public void setPositionLength(int positionLength) {} + @Override + public int getPositionLength() { + return 0; + } + }; + } + + /** + * Creates NGramTokenFilter with default min and max n-grams. + * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized + */ + public Lucene43NGramTokenFilter(TokenStream input) { + this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE); + } + + /** Returns the next token in the stream, or null at EOS. */ + @Override + public final boolean incrementToken() throws IOException { + while (true) { + if (curTermBuffer == null) { + if (!input.incrementToken()) { + return false; + } else { + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); + curCodePointCount = charUtils.codePointCount(termAtt); + curGramSize = minGram; + curPos = 0; + curPosInc = posIncAtt.getPositionIncrement(); + curPosLen = posLenAtt.getPositionLength(); + tokStart = offsetAtt.startOffset(); + tokEnd = offsetAtt.endOffset(); + // if length by start + end offsets doesn't match the term text then assume + // this is a synonym and don't adjust the offsets. + hasIllegalOffsets = (tokStart + curTermLength) != tokEnd; + } + } + + while (curGramSize <= maxGram) { + while (curPos+curGramSize <= curTermLength) { // while there is input + clearAttributes(); + termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); + if (hasIllegalOffsets) { + offsetAtt.setOffset(tokStart, tokEnd); + } else { + offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); + } + curPos++; + return true; + } + curGramSize++; // increase n-gram size + curPos = 0; + } + curTermBuffer = null; + } + } + + @Override + public void reset() throws IOException { + super.reset(); + curTermBuffer = null; + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java index fa9fcb0caec..8cde3e40c6b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java @@ -29,7 +29,7 @@ import org.apache.lucene.util.AttributeFactory; * Old broken version of {@link NGramTokenizer}. */ @Deprecated -public final class Lucene43NGramTokenizer extends Tokenizer { +public class Lucene43NGramTokenizer extends Tokenizer { public static final int DEFAULT_MIN_NGRAM_SIZE = 1; public static final int DEFAULT_MAX_NGRAM_SIZE = 2; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java index 60398bdf4b2..70e802b5bc1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java @@ -18,8 +18,11 @@ package org.apache.lucene.analysis.ngram; */ import java.util.Map; + +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.util.Version; /** * Factory for {@link NGramTokenFilter}. @@ -46,7 +49,10 @@ public class NGramFilterFactory extends TokenFilterFactory { } @Override - public NGramTokenFilter create(TokenStream input) { - return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize); + public TokenFilter create(TokenStream input) { + if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) { + return new NGramTokenFilter(input, minGramSize, maxGramSize); + } + return new Lucene43NGramTokenFilter(input, minGramSize, maxGramSize); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java index ba87146a44f..83b19e6e561 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java @@ -27,21 +27,18 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; /** * Tokenizes the input into n-grams of the given size(s). - * - *

You must specify the required {@link Version} compatibility when - * creating a {@link NGramTokenFilter}. As of Lucene 4.4, this token filters:

    + * As of Lucene 4.4, this token filter:
      *
    • handles supplementary characters correctly,
    • *
    • emits all n-grams for the same token at the same position,
    • *
    • does not modify offsets,
    • *
    • sorts n-grams by their offset in the original token first, then * increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc", * "c").
    - *

    You can make this filter use the old behavior by providing a version < - * {@link Version#LUCENE_4_4} in the constructor but this is not recommended as + *

    You can make this filter use the old behavior by using + * {@link org.apache.lucene.analysis.ngram.Lucene43NGramTokenFilter} but this is not recommended as * it will lead to broken {@link TokenStream}s that will cause highlighting * bugs. *

    If you were using this {@link TokenFilter} to perform partial highlighting, @@ -65,7 +62,6 @@ public final class NGramTokenFilter extends TokenFilter { private int tokEnd; private boolean hasIllegalOffsets; // only if the length changed before this filter - private final Version version; private final CharacterUtils charUtils; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncAtt; @@ -74,18 +70,13 @@ public final class NGramTokenFilter extends TokenFilter { /** * Creates NGramTokenFilter with given min and max n-grams. - * @param version Lucene version to enable correct position increments. - * See above for details. * @param input {@link TokenStream} holding the input to be tokenized * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ - public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) { - super(new CodepointCountFilter(version, input, minGram, Integer.MAX_VALUE)); - this.version = version; - this.charUtils = version.onOrAfter(Version.LUCENE_4_4) - ? CharacterUtils.getInstance(version) - : CharacterUtils.getJava4Instance(); + public NGramTokenFilter(TokenStream input, int minGram, int maxGram) { + super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE)); + this.charUtils = CharacterUtils.getInstance(); if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); } @@ -94,37 +85,17 @@ public final class NGramTokenFilter extends TokenFilter { } this.minGram = minGram; this.maxGram = maxGram; - if (version.onOrAfter(Version.LUCENE_4_4)) { - posIncAtt = addAttribute(PositionIncrementAttribute.class); - posLenAtt = addAttribute(PositionLengthAttribute.class); - } else { - posIncAtt = new PositionIncrementAttribute() { - @Override - public void setPositionIncrement(int positionIncrement) {} - @Override - public int getPositionIncrement() { - return 0; - } - }; - posLenAtt = new PositionLengthAttribute() { - @Override - public void setPositionLength(int positionLength) {} - @Override - public int getPositionLength() { - return 0; - } - }; - } + + posIncAtt = addAttribute(PositionIncrementAttribute.class); + posLenAtt = addAttribute(PositionLengthAttribute.class); } /** * Creates NGramTokenFilter with default min and max n-grams. - * @param version Lucene version to enable correct position increments. - * See above for details. * @param input {@link TokenStream} holding the input to be tokenized */ - public NGramTokenFilter(Version version, TokenStream input) { - this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE); + public NGramTokenFilter(TokenStream input) { + this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE); } /** Returns the next token in the stream, or null at EOS. */ @@ -149,39 +120,22 @@ public final class NGramTokenFilter extends TokenFilter { hasIllegalOffsets = (tokStart + curTermLength) != tokEnd; } } - if (version.onOrAfter(Version.LUCENE_4_4)) { - if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) { - ++curPos; - curGramSize = minGram; - } - if ((curPos + curGramSize) <= curCodePointCount) { - clearAttributes(); - final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos); - final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize); - termAtt.copyBuffer(curTermBuffer, start, end - start); - posIncAtt.setPositionIncrement(curPosInc); - curPosInc = 0; - posLenAtt.setPositionLength(curPosLen); - offsetAtt.setOffset(tokStart, tokEnd); - curGramSize++; - return true; - } - } else { - while (curGramSize <= maxGram) { - while (curPos+curGramSize <= curTermLength) { // while there is input - clearAttributes(); - termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); - if (hasIllegalOffsets) { - offsetAtt.setOffset(tokStart, tokEnd); - } else { - offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); - } - curPos++; - return true; - } - curGramSize++; // increase n-gram size - curPos = 0; - } + + if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) { + ++curPos; + curGramSize = minGram; + } + if ((curPos + curGramSize) <= curCodePointCount) { + clearAttributes(); + final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos); + final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize); + termAtt.copyBuffer(curTermBuffer, start, end - start); + posIncAtt.setPositionIncrement(curPosInc); + curPosInc = 0; + posLenAtt.setPositionLength(curPosLen); + offsetAtt.setOffset(tokStart, tokEnd); + curGramSize++; + return true; } curTermBuffer = null; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java index 72c943b1ef9..177e46733fe 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ngram; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -27,7 +26,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.util.CharacterUtils; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.Version; /** * Tokenizes the input into n-grams of the given size(s). @@ -78,51 +76,43 @@ public class NGramTokenizer extends Tokenizer { private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - NGramTokenizer(Version version, int minGram, int maxGram, boolean edgesOnly) { - init(version, minGram, maxGram, edgesOnly); + NGramTokenizer(int minGram, int maxGram, boolean edgesOnly) { + init(minGram, maxGram, edgesOnly); } /** * Creates NGramTokenizer with given min and max n-grams. - * @param version the lucene compatibility version * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ - public NGramTokenizer(Version version, int minGram, int maxGram) { - this(version, minGram, maxGram, false); + public NGramTokenizer(int minGram, int maxGram) { + this(minGram, maxGram, false); } - NGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram, boolean edgesOnly) { + NGramTokenizer(AttributeFactory factory, int minGram, int maxGram, boolean edgesOnly) { super(factory); - init(version, minGram, maxGram, edgesOnly); + init(minGram, maxGram, edgesOnly); } /** * Creates NGramTokenizer with given min and max n-grams. - * @param version the lucene compatibility version * @param factory {@link org.apache.lucene.util.AttributeFactory} to use * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ - public NGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram) { - this(version, factory, minGram, maxGram, false); + public NGramTokenizer(AttributeFactory factory, int minGram, int maxGram) { + this(factory, minGram, maxGram, false); } /** * Creates NGramTokenizer with default min and max n-grams. - * @param version the lucene compatibility version */ - public NGramTokenizer(Version version) { - this(version, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE); + public NGramTokenizer() { + this(DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE); } - private void init(Version version, int minGram, int maxGram, boolean edgesOnly) { - if (!edgesOnly && !version.onOrAfter(Version.LUCENE_4_4)) { - throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer"); - } - charUtils = version.onOrAfter(Version.LUCENE_4_4) - ? CharacterUtils.getInstance(version) - : CharacterUtils.getJava4Instance(); + private void init(int minGram, int maxGram, boolean edgesOnly) { + charUtils = CharacterUtils.getInstance(); if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java index 7aa4a502cdb..de9a010db58 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java @@ -53,7 +53,7 @@ public class NGramTokenizerFactory extends TokenizerFactory { @Override public Tokenizer create(AttributeFactory factory) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) { - return new NGramTokenizer(luceneMatchVersion, factory, minGramSize, maxGramSize); + return new NGramTokenizer(factory, minGramSize, maxGramSize); } else { return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java index 1f29184429d..e3b2389f542 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java @@ -28,13 +28,11 @@ import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -50,10 +48,8 @@ import java.nio.charset.StandardCharsets; * A default set of stopwords is used unless an alternative list is specified, but the * exclusion list is empty by default. *

    - * - *

    NOTE: This class uses the same {@link Version} - * dependent settings as {@link StandardAnalyzer}.

    */ +// TODO: extend StopwordAnalyzerBase public final class DutchAnalyzer extends Analyzer { /** File containing default Dutch stopwords. */ @@ -73,14 +69,14 @@ public final class DutchAnalyzer extends Analyzer { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) throw new RuntimeException("Unable to load default stopword set"); } - DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false); + DEFAULT_STEM_DICT = new CharArrayMap<>(4, false); DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet DEFAULT_STEM_DICT.put("ei", "eier"); @@ -100,29 +96,27 @@ public final class DutchAnalyzer extends Analyzer { private CharArraySet excltable = CharArraySet.EMPTY_SET; private final StemmerOverrideMap stemdict; - private final Version matchVersion; /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}) * and a few default entries for the stem exclusion table. * */ - public DutchAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); + public DutchAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); } - public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){ - this(matchVersion, stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); + public DutchAnalyzer(CharArraySet stopwords){ + this(stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); } - public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){ - this(matchVersion, stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT); + public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable){ + this(stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT); } - public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) { - this.matchVersion = matchVersion; - this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); - this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); + public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) { + this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords)); + this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable)); if (stemOverrideDict.isEmpty()) { this.stemdict = null; } else { @@ -154,10 +148,10 @@ public final class DutchAnalyzer extends Analyzer { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stoptable); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stoptable); if (!excltable.isEmpty()) result = new SetKeywordMarkerFilter(result, excltable); if (stemdict != null) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java index ffe519947d0..0dd81255964 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.NorwegianStemmer; /** @@ -64,7 +63,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -76,18 +75,17 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public NorwegianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public NorwegianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public NorwegianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -95,14 +93,12 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public NorwegianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -119,10 +115,10 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new NorwegianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java index 3bceb5c6ab3..fde61d6fa8c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Portuguese. @@ -63,7 +62,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,18 +74,17 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public PortugueseAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public PortugueseAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public PortugueseAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -94,14 +92,12 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public PortugueseAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -118,10 +114,10 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PortugueseLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java index f8ad153cfb4..54ecdff15f2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java @@ -31,7 +31,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; import static org.apache.lucene.analysis.util.StemmerUtil.*; @@ -135,8 +134,7 @@ public abstract class RSLPStemmerBase { if (!exceptions[i].endsWith(suffix)) throw new RuntimeException("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'"); } - this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, - Arrays.asList(exceptions), false); + this.exceptions = new CharArraySet(Arrays.asList(exceptions), false); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java index 8a4b8aa52ad..995ae2db893 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java @@ -31,7 +31,6 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; -import org.apache.lucene.util.Version; /** * An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection @@ -50,23 +49,20 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { //The default maximum percentage (40%) of index documents which //can contain a term, after which the term is considered to be a stop word. public static final float defaultMaxDocFreqPercent = 0.4f; - private final Version matchVersion; /** * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all * indexed fields from terms with a document frequency percentage greater than * {@link #defaultMaxDocFreqPercent} * - * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( - Version matchVersion, Analyzer delegate, IndexReader indexReader) throws IOException { - this(matchVersion, delegate, indexReader, defaultMaxDocFreqPercent); + this(delegate, indexReader, defaultMaxDocFreqPercent); } /** @@ -74,18 +70,16 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * indexed fields from terms with a document frequency greater than the given * maxDocFreq * - * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param maxDocFreq Document frequency terms should be above in order to be stopwords * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( - Version matchVersion, Analyzer delegate, IndexReader indexReader, int maxDocFreq) throws IOException { - this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq); + this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq); } /** @@ -93,7 +87,6 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * indexed fields from terms with a document frequency percentage greater than * the given maxPercentDocs * - * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which @@ -101,11 +94,10 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( - Version matchVersion, Analyzer delegate, IndexReader indexReader, float maxPercentDocs) throws IOException { - this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs); + this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs); } /** @@ -113,7 +105,6 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * given selection of fields from terms with a document frequency percentage * greater than the given maxPercentDocs * - * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param fields Selection of fields to calculate stopwords for @@ -122,12 +113,11 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( - Version matchVersion, Analyzer delegate, IndexReader indexReader, Collection fields, float maxPercentDocs) throws IOException { - this(matchVersion, delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs)); + this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs)); } /** @@ -135,7 +125,6 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * given selection of fields from terms with a document frequency greater than * the given maxDocFreq * - * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param fields Selection of fields to calculate stopwords for @@ -143,13 +132,11 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( - Version matchVersion, Analyzer delegate, IndexReader indexReader, Collection fields, int maxDocFreq) throws IOException { super(delegate.getReuseStrategy()); - this.matchVersion = matchVersion; this.delegate = delegate; for (String field : fields) { @@ -181,8 +168,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { if (stopWords == null) { return components; } - StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), - new CharArraySet(matchVersion, stopWords, false)); + StopFilter stopFilter = new StopFilter(components.getTokenStream(), + new CharArraySet(stopWords, false)); return new TokenStreamComponents(components.getTokenizer(), stopFilter); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java index e729786cfe7..c9dee414442 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.reverse; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.util.Version; import java.io.IOException; @@ -36,7 +35,6 @@ public final class ReverseStringFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final char marker; - private final Version matchVersion; private static final char NOMARKER = '\uFFFF'; /** @@ -66,11 +64,10 @@ public final class ReverseStringFilter extends TokenFilter { * The reversed tokens will not be marked. *

    * - * @param matchVersion Lucene compatibility version * @param in {@link TokenStream} to filter */ - public ReverseStringFilter(Version matchVersion, TokenStream in) { - this(matchVersion, in, NOMARKER); + public ReverseStringFilter(TokenStream in) { + this(in, NOMARKER); } /** @@ -81,13 +78,11 @@ public final class ReverseStringFilter extends TokenFilter { * character. *

    * - * @param matchVersion compatibility version * @param in {@link TokenStream} to filter * @param marker A character used to mark reversed tokens */ - public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) { + public ReverseStringFilter(TokenStream in, char marker) { super(in); - this.matchVersion = matchVersion; this.marker = marker; } @@ -100,7 +95,7 @@ public final class ReverseStringFilter extends TokenFilter { termAtt.resizeBuffer(len); termAtt.buffer()[len - 1] = marker; } - reverse( matchVersion, termAtt.buffer(), 0, len ); + reverse( termAtt.buffer(), 0, len ); termAtt.setLength(len); return true; } else { @@ -111,48 +106,43 @@ public final class ReverseStringFilter extends TokenFilter { /** * Reverses the given input string * - * @param matchVersion compatibility version * @param input the string to reverse * @return the given input string in reversed order */ - public static String reverse( Version matchVersion, final String input ){ + public static String reverse(final String input ){ final char[] charInput = input.toCharArray(); - reverse( matchVersion, charInput, 0, charInput.length ); + reverse( charInput, 0, charInput.length ); return new String( charInput ); } /** * Reverses the given input buffer in-place - * @param matchVersion compatibility version * @param buffer the input char array to reverse */ - public static void reverse(Version matchVersion, final char[] buffer) { - reverse(matchVersion, buffer, 0, buffer.length); + public static void reverse(final char[] buffer) { + reverse(buffer, 0, buffer.length); } /** * Partially reverses the given input buffer in-place from offset 0 * up to the given length. - * @param matchVersion compatibility version * @param buffer the input char array to reverse * @param len the length in the buffer up to where the * buffer should be reversed */ - public static void reverse(Version matchVersion, final char[] buffer, - final int len) { - reverse( matchVersion, buffer, 0, len ); + public static void reverse(final char[] buffer, final int len) { + reverse( buffer, 0, len ); } /** * Partially reverses the given input buffer in-place from the given offset * up to the given length. - * @param matchVersion compatibility version * @param buffer the input char array to reverse * @param start the offset from where to reverse the buffer * @param len the length in the buffer up to where the * buffer should be reversed */ - public static void reverse(Version matchVersion, final char[] buffer, + public static void reverse(final char[] buffer, final int start, final int len) { /* modified version of Apache Harmony AbstractStringBuilder reverse0() */ if (len < 2) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java index f25831ad198..33cfc97fb5d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java @@ -40,7 +40,6 @@ public class ReverseStringFilterFactory extends TokenFilterFactory { /** Creates a new ReverseStringFilterFactory */ public ReverseStringFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -48,7 +47,7 @@ public class ReverseStringFilterFactory extends TokenFilterFactory { @Override public ReverseStringFilter create(TokenStream in) { - return new ReverseStringFilter(luceneMatchVersion,in); + return new ReverseStringFilter(in); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java index 22af94ec177..cca18c6ecc5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java @@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.RomanianStemmer; /** @@ -78,18 +77,17 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public RomanianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public RomanianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public RomanianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -97,14 +95,12 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public RomanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -121,10 +117,10 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new RomanianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java index 69ab96fa679..7dd1406aebf 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * {@link Analyzer} for Russian language. @@ -54,7 +53,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,34 +73,30 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { return DefaultSetHolder.DEFAULT_STOP_SET; } - public RussianAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public RussianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set */ - public RussianAnalyzer(Version matchVersion, CharArraySet stopwords){ - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public RussianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words * - * @param matchVersion - * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet a set of words not to be stemmed */ - public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){ - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); + public RussianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -117,10 +112,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if (!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java index 9f7cf319012..cd2e3353f38 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.shingle; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.util.Version; /** * A ShingleAnalyzerWrapper wraps a {@link ShingleFilter} around another {@link Analyzer}. @@ -101,15 +100,15 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper { /** * Wraps {@link StandardAnalyzer}. */ - public ShingleAnalyzerWrapper(Version matchVersion) { - this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); + public ShingleAnalyzerWrapper() { + this(ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); } /** * Wraps {@link StandardAnalyzer}. */ - public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) { - this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize); + public ShingleAnalyzerWrapper(int minShingleSize, int maxShingleSize) { + this(new StandardAnalyzer(), minShingleSize, maxShingleSize); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java index f7927161726..9663bfacaed 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java @@ -17,16 +17,14 @@ package org.apache.lucene.analysis.standard; * limitations under the License. */ -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; -import org.apache.lucene.util.Version; -import java.io.File; import java.io.IOException; import java.io.Reader; @@ -34,18 +32,6 @@ import java.io.Reader; * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link * LowerCaseFilter} and {@link StopFilter}, using a list of * English stop words. - * - * - *

    You must specify the required {@link Version} - * compatibility when creating ClassicAnalyzer: - *

    * * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation, @@ -63,29 +49,23 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase { public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** Builds an analyzer with the given stop words. - * @param matchVersion Lucene version to match See {@link - * above} * @param stopWords stop words */ - public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) { - super(matchVersion, stopWords); + public ClassicAnalyzer(CharArraySet stopWords) { + super(stopWords); } /** Builds an analyzer with the default stop words ({@link * #STOP_WORDS_SET}). - * @param matchVersion Lucene version to match See {@link - * above} */ - public ClassicAnalyzer(Version matchVersion) { - this(matchVersion, STOP_WORDS_SET); + public ClassicAnalyzer() { + this(STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given reader. - * @see WordlistLoader#getWordSet(Reader, Version) - * @param matchVersion Lucene version to match See {@link - * above} + * @see WordlistLoader#getWordSet(Reader) * @param stopwords Reader to read stop words from */ - public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException { - this(matchVersion, loadStopwordSet(stopwords, matchVersion)); + public ClassicAnalyzer(Reader stopwords) throws IOException { + this(loadStopwordSet(stopwords)); } /** @@ -107,11 +87,11 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName) { - final ClassicTokenizer src = new ClassicTokenizer(matchVersion); + final ClassicTokenizer src = new ClassicTokenizer(); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new ClassicFilter(src); - tok = new LowerCaseFilter(matchVersion, tok); - tok = new StopFilter(matchVersion, tok, stopwords); + tok = new LowerCaseFilter(tok); + tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java index eb085894788..118a41cb8b0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.standard; import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -26,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; /** A grammar-based tokenizer constructed with JFlex * @@ -102,19 +99,19 @@ public final class ClassicTokenizer extends Tokenizer { * * See http://issues.apache.org/jira/browse/LUCENE-1068 */ - public ClassicTokenizer(Version matchVersion) { - init(matchVersion); + public ClassicTokenizer() { + init(); } /** * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} */ - public ClassicTokenizer(Version matchVersion, AttributeFactory factory) { + public ClassicTokenizer(AttributeFactory factory) { super(factory); - init(matchVersion); + init(); } - private void init(Version matchVersion) { + private void init() { this.scanner = new ClassicTokenizerImpl(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java index 3d73bd7d506..e4d901b82ba 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java @@ -37,7 +37,6 @@ public class ClassicTokenizerFactory extends TokenizerFactory { /** Creates a new ClassicTokenizerFactory */ public ClassicTokenizerFactory(Map args) { super(args); - assureMatchVersion(); maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -46,7 +45,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory { @Override public ClassicTokenizer create(AttributeFactory factory) { - ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory); + ClassicTokenizer tokenizer = new ClassicTokenizer(factory); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java index 00604afc17e..db9c4719dc0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java @@ -17,16 +17,14 @@ package org.apache.lucene.analysis.standard; * limitations under the License. */ -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; -import org.apache.lucene.util.Version; -import java.io.File; import java.io.IOException; import java.io.Reader; @@ -34,26 +32,9 @@ import java.io.Reader; * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link * LowerCaseFilter} and {@link StopFilter}, using a list of * English stop words. - * - * - *

    You must specify the required {@link Version} - * compatibility when creating StandardAnalyzer: - *

    */ public final class StandardAnalyzer extends StopwordAnalyzerBase { - + /** Default maximum allowed token length */ public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; @@ -64,29 +45,22 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase { public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** Builds an analyzer with the given stop words. - * @param matchVersion Lucene version to match See {@link - * above} * @param stopWords stop words */ - public StandardAnalyzer(Version matchVersion, CharArraySet stopWords) { - super(matchVersion, stopWords); + public StandardAnalyzer(CharArraySet stopWords) { + super(stopWords); } - /** Builds an analyzer with the default stop words ({@link - * #STOP_WORDS_SET}). - * @param matchVersion Lucene version to match See {@link - * above} + /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). */ - public StandardAnalyzer(Version matchVersion) { - this(matchVersion, STOP_WORDS_SET); + public StandardAnalyzer() { + this(STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given reader. - * @see WordlistLoader#getWordSet(Reader, Version) - * @param matchVersion Lucene version to match See {@link - * above} + * @see WordlistLoader#getWordSet(Reader) * @param stopwords Reader to read stop words from */ - public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException { - this(matchVersion, loadStopwordSet(stopwords, matchVersion)); + public StandardAnalyzer(Reader stopwords) throws IOException { + this(loadStopwordSet(stopwords)); } /** @@ -108,11 +82,11 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName) { - final StandardTokenizer src = new StandardTokenizer(matchVersion); + final StandardTokenizer src = new StandardTokenizer(); src.setMaxTokenLength(maxTokenLength); - TokenStream tok = new StandardFilter(matchVersion, src); - tok = new LowerCaseFilter(matchVersion, tok); - tok = new StopFilter(matchVersion, tok, stopwords); + TokenStream tok = new StandardFilter(src); + tok = new LowerCaseFilter(tok); + tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java index 809f9653dfe..ae5be75bc1e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java @@ -21,14 +21,13 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.util.Version; /** * Normalizes tokens extracted with {@link StandardTokenizer}. */ public class StandardFilter extends TokenFilter { - public StandardFilter(Version matchVersion, TokenStream in) { + public StandardFilter(TokenStream in) { super(in); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java index f2dd7e0507f..f9102b00b44 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java @@ -38,7 +38,6 @@ public class StandardFilterFactory extends TokenFilterFactory { /** Creates a new StandardFilterFactory */ public StandardFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -46,6 +45,6 @@ public class StandardFilterFactory extends TokenFilterFactory { @Override public StandardFilter create(TokenStream input) { - return new StandardFilter(luceneMatchVersion, input); + return new StandardFilter(input); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index 196c0ca1baf..bcfb6f6f267 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.standard; import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -26,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; /** A grammar-based tokenizer constructed with JFlex. *

    @@ -116,19 +113,19 @@ public final class StandardTokenizer extends Tokenizer { * See http://issues.apache.org/jira/browse/LUCENE-1068 */ - public StandardTokenizer(Version matchVersion) { - init(matchVersion); + public StandardTokenizer() { + init(); } /** * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} */ - public StandardTokenizer(Version matchVersion, AttributeFactory factory) { + public StandardTokenizer(AttributeFactory factory) { super(factory); - init(matchVersion); + init(); } - private void init(Version matchVersion) { + private void init() { this.scanner = new StandardTokenizerImpl(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java index bb5248b947b..87709aa8622 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java @@ -37,7 +37,6 @@ public class StandardTokenizerFactory extends TokenizerFactory { /** Creates a new StandardTokenizerFactory */ public StandardTokenizerFactory(Map args) { super(args); - assureMatchVersion(); maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -46,7 +45,7 @@ public class StandardTokenizerFactory extends TokenizerFactory { @Override public StandardTokenizer create(AttributeFactory factory) { - StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory); + StandardTokenizer tokenizer = new StandardTokenizer(factory); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java index 59cfbd16ec7..53ffac224b9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -34,15 +33,9 @@ import java.io.Reader; * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and * {@link org.apache.lucene.analysis.core.StopFilter}, using a list of * English stop words. - * - * - *

    - * You must specify the required {@link org.apache.lucene.util.Version} - * compatibility when creating UAX29URLEmailAnalyzer - *

    */ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase { - + /** Default maximum allowed token length */ public static final int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; @@ -53,29 +46,23 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase { public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** Builds an analyzer with the given stop words. - * @param matchVersion Lucene version to match See {@link - * above} * @param stopWords stop words */ - public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) { - super(matchVersion, stopWords); + public UAX29URLEmailAnalyzer(CharArraySet stopWords) { + super(stopWords); } /** Builds an analyzer with the default stop words ({@link * #STOP_WORDS_SET}). - * @param matchVersion Lucene version to match See {@link - * above} */ - public UAX29URLEmailAnalyzer(Version matchVersion) { - this(matchVersion, STOP_WORDS_SET); + public UAX29URLEmailAnalyzer() { + this(STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given reader. - * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version) - * @param matchVersion Lucene version to match See {@link - * above} + * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader) * @param stopwords Reader to read stop words from */ - public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) throws IOException { - this(matchVersion, loadStopwordSet(stopwords, matchVersion)); + public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException { + this(loadStopwordSet(stopwords)); } /** @@ -97,11 +84,11 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName) { - final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion); + final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(); src.setMaxTokenLength(maxTokenLength); - TokenStream tok = new StandardFilter(matchVersion, src); - tok = new LowerCaseFilter(matchVersion, tok); - tok = new StopFilter(matchVersion, tok, stopwords); + TokenStream tok = new StandardFilter(src); + tok = new LowerCaseFilter(tok); + tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index cd1218d8da7..522276b5b5f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -18,9 +18,6 @@ package org.apache.lucene.analysis.standard; */ import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -28,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; /** * This class implements Word Break rules from the Unicode Text Segmentation @@ -100,19 +95,19 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { * the input to the newly created JFlex scanner. */ - public UAX29URLEmailTokenizer(Version matchVersion) { - this.scanner = getScannerFor(matchVersion); + public UAX29URLEmailTokenizer() { + this.scanner = getScanner(); } /** * Creates a new UAX29URLEmailTokenizer with a given {@link AttributeFactory} */ - public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory) { + public UAX29URLEmailTokenizer(AttributeFactory factory) { super(factory); - this.scanner = getScannerFor(matchVersion); + this.scanner = getScanner(); } - private StandardTokenizerInterface getScannerFor(Version matchVersion) { + private StandardTokenizerInterface getScanner() { return new UAX29URLEmailTokenizerImpl(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java index e1218075aea..485b7d33a6e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java @@ -38,7 +38,6 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory { /** Creates a new UAX29URLEmailTokenizerFactory */ public UAX29URLEmailTokenizerFactory(Map args) { super(args); - assureMatchVersion(); maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -47,7 +46,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory { @Override public UAX29URLEmailTokenizer create(AttributeFactory factory) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(factory); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java index a8878ea2139..e47e7f8c55c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.SwedishStemmer; /** @@ -64,7 +63,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -76,18 +75,17 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public SwedishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public SwedishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public SwedishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -95,14 +93,12 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public SwedishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -119,10 +115,10 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new SwedishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java index 7fcbf471c56..45bd3529015 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java @@ -134,8 +134,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT) : factory.create(); - TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; + Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); + TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; @@ -202,7 +202,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException { Class clazz = loader.findClass(cname, Analyzer.class); try { - Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT); + Analyzer analyzer = clazz.getConstructor().newInstance(); if (analyzer instanceof ResourceLoaderAware) { ((ResourceLoaderAware) analyzer).inform(loader); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java index 12e27ad2aff..b08e7845e44 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -73,21 +72,18 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words. - * - * @param matchVersion lucene compatibility version */ - public ThaiAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public ThaiAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. - * - * @param matchVersion lucene compatibility version + * * @param stopwords a stopword set */ - public ThaiAnalyzer(Version matchVersion, CharArraySet stopwords) { - super(matchVersion, stopwords); + public ThaiAnalyzer(CharArraySet stopwords) { + super(stopwords); } /** @@ -102,17 +98,17 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - if (matchVersion.onOrAfter(Version.LUCENE_4_8)) { + if (getVersion().onOrAfter(Version.LUCENE_4_8)) { final Tokenizer source = new ThaiTokenizer(); - TokenStream result = new LowerCaseFilter(matchVersion, source); - result = new StopFilter(matchVersion, result, stopwords); + TokenStream result = new LowerCaseFilter(source); + result = new StopFilter(result, stopwords); return new TokenStreamComponents(source, result); } else { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new ThaiWordFilter(matchVersion, result); - return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords)); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new ThaiWordFilter(result); + return new TokenStreamComponents(source, new StopFilter(result, stopwords)); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java index c387333ff50..7eb1eda5b5a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java @@ -28,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArrayIterator; import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Version; /** * {@link TokenFilter} that use {@link java.text.BreakIterator} to break each @@ -61,7 +60,7 @@ public final class ThaiWordFilter extends TokenFilter { private boolean hasIllegalOffsets = false; // only if the length changed before this filter /** Creates a new ThaiWordFilter with the specified match version. */ - public ThaiWordFilter(Version matchVersion, TokenStream input) { + public ThaiWordFilter(TokenStream input) { super(input); if (!DBBI_AVAILABLE) throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation"); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java index 699af7bf5a2..154187e2f6a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java @@ -41,7 +41,6 @@ public class ThaiWordFilterFactory extends TokenFilterFactory { /** Creates a new ThaiWordFilterFactory */ public ThaiWordFilterFactory(Map args) { super(args); - assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -49,7 +48,7 @@ public class ThaiWordFilterFactory extends TokenFilterFactory { @Override public ThaiWordFilter create(TokenStream input) { - return new ThaiWordFilter(luceneMatchVersion, input); + return new ThaiWordFilter(input); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java index 0c8842bbfe5..60e08a0063b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java @@ -77,33 +77,30 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public TurkishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); + public TurkishAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. - * - * @param matchVersion lucene compatibility version + * * @param stopwords a stopword set */ - public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public TurkishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. - * - * @param matchVersion lucene compatibility version + * * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + public TurkishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -120,14 +117,16 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - if(matchVersion.onOrAfter(Version.LUCENE_4_8)) + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + if (getVersion().onOrAfter(Version.LUCENE_4_8)) { result = new ApostropheFilter(result); + } result = new TurkishLowerCaseFilter(result); - result = new StopFilter(matchVersion, result, stopwords); - if(!stemExclusionSet.isEmpty()) + result = new StopFilter(result, stopwords); + if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); + } result = new SnowballFilter(result, new TurkishStemmer()); return new TokenStreamComponents(source, result); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java index 5234440d0ac..325e5dbad22 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java @@ -238,12 +238,10 @@ public abstract class AbstractAnalysisFactory { if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start - words = new CharArraySet(luceneMatchVersion, - files.size() * 10, ignoreCase); + words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { List wlist = getLines(loader, file.trim()); - words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, - ignoreCase)); + words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); } } return words; @@ -266,8 +264,7 @@ public abstract class AbstractAnalysisFactory { if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start - words = new CharArraySet(luceneMatchVersion, - files.size() * 10, ignoreCase); + words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { InputStream stream = null; Reader reader = null; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java index f867cf7ea88..7529d93d8df 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java @@ -25,8 +25,6 @@ import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; - /** * A simple class that stores key Strings as char[]'s in a @@ -36,19 +34,6 @@ import org.apache.lucene.util.Version; * etc. It is designed to be quick to retrieve items * by char[] keys without the necessity of converting * to a String first. - * - * - *

    You must specify the required {@link Version} - * compatibility when creating {@link CharArrayMap}: - *

      - *
    • As of 3.1, supplementary characters are - * properly lowercased.
    • - *
    - * Before 3.1 supplementary characters could not be - * lowercased correctly due to the lack of Unicode 4 - * support in JDK 1.4. To use instances of - * {@link CharArrayMap} with the behavior before Lucene - * 3.1 pass a {@link Version} < 3.1 to the constructors. */ public class CharArrayMap extends AbstractMap { // private only because missing generics @@ -58,16 +43,12 @@ public class CharArrayMap extends AbstractMap { private final CharacterUtils charUtils; private boolean ignoreCase; private int count; - final Version matchVersion; // package private because used in CharArraySet char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator /** * Create map with enough capacity to hold startSize terms - * - * @param matchVersion - * compatibility match version see Version - * note above for details. + * * @param startSize * the initial capacity * @param ignoreCase @@ -75,31 +56,27 @@ public class CharArrayMap extends AbstractMap { * otherwise true. */ @SuppressWarnings("unchecked") - public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) { + public CharArrayMap(int startSize, boolean ignoreCase) { this.ignoreCase = ignoreCase; int size = INIT_SIZE; while(startSize + (startSize>>2) > size) size <<= 1; keys = new char[size][]; values = (V[]) new Object[size]; - this.charUtils = CharacterUtils.getInstance(matchVersion); - this.matchVersion = matchVersion; + this.charUtils = CharacterUtils.getInstance(); } /** * Creates a map from the mappings in another map. - * - * @param matchVersion - * compatibility match version see Version - * note above for details. + * * @param c * a map whose mappings to be copied * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArrayMap(Version matchVersion, Map c, boolean ignoreCase) { - this(matchVersion, c.size(), ignoreCase); + public CharArrayMap(Map c, boolean ignoreCase) { + this(c.size(), ignoreCase); putAll(c); } @@ -110,7 +87,6 @@ public class CharArrayMap extends AbstractMap { this.ignoreCase = toCopy.ignoreCase; this.count = toCopy.count; this.charUtils = toCopy.charUtils; - this.matchVersion = toCopy.matchVersion; } /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */ @@ -565,18 +541,7 @@ public class CharArrayMap extends AbstractMap { /** * Returns a copy of the given map as a {@link CharArrayMap}. If the given map * is a {@link CharArrayMap} the ignoreCase property will be preserved. - *

    - * Note: If you intend to create a copy of another {@link CharArrayMap} where - * the {@link Version} of the source map differs from its copy - * {@link #CharArrayMap(Version, Map, boolean)} should be used instead. - * The {@link #copy(Version, Map)} will preserve the {@link Version} of the - * source map it is an instance of {@link CharArrayMap}. - *

    * - * @param matchVersion - * compatibility match version see Version - * note above for details. This argument will be ignored if the - * given map is a {@link CharArrayMap}. * @param map * a map to copy * @return a copy of the given map as a {@link CharArrayMap}. If the given map @@ -584,7 +549,7 @@ public class CharArrayMap extends AbstractMap { * matchVersion will be of the given map will be preserved. */ @SuppressWarnings("unchecked") - public static CharArrayMap copy(final Version matchVersion, final Map map) { + public static CharArrayMap copy(final Map map) { if(map == EMPTY_MAP) return emptyMap(); if(map instanceof CharArrayMap) { @@ -600,7 +565,7 @@ public class CharArrayMap extends AbstractMap { m.values = values; return m; } - return new CharArrayMap<>(matchVersion, map, false); + return new CharArrayMap<>(map, false); } /** Returns an empty, unmodifiable map. */ @@ -659,7 +624,7 @@ public class CharArrayMap extends AbstractMap { */ private static final class EmptyCharArrayMap extends UnmodifiableCharArrayMap { EmptyCharArrayMap() { - super(new CharArrayMap(Version.LUCENE_CURRENT, 0, false)); + super(new CharArrayMap(0, false)); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java index 109f2472867..4b9b264eef5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java @@ -22,9 +22,6 @@ import java.util.Collection; import java.util.Iterator; import java.util.Set; -import org.apache.lucene.util.Version; - - /** * A simple class that stores Strings as char[]'s in a * hash table. Note that this is not a general purpose @@ -34,18 +31,6 @@ import org.apache.lucene.util.Version; * is in the set without the necessity of converting it * to a String first. * - * - *

    You must specify the required {@link Version} - * compatibility when creating {@link CharArraySet}: - *

      - *
    • As of 3.1, supplementary characters are - * properly lowercased.
    • - *
    - * Before 3.1 supplementary characters could not be - * lowercased correctly due to the lack of Unicode 4 - * support in JDK 1.4. To use instances of - * {@link CharArraySet} with the behavior before Lucene - * 3.1 pass a {@link Version} < 3.1 to the constructors. *

    * Please note: This class implements {@link java.util.Set Set} but * does not behave like it should in all cases. The generic type is @@ -64,33 +49,27 @@ public class CharArraySet extends AbstractSet { /** * Create set with enough capacity to hold startSize terms * - * @param matchVersion - * compatibility match version see Version - * note above for details. * @param startSize * the initial capacity * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) { - this(new CharArrayMap<>(matchVersion, startSize, ignoreCase)); + public CharArraySet(int startSize, boolean ignoreCase) { + this(new CharArrayMap<>(startSize, ignoreCase)); } /** * Creates a set from a Collection of objects. * - * @param matchVersion - * compatibility match version see Version - * note above for details. * @param c * a collection whose elements to be placed into the set * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(Version matchVersion, Collection c, boolean ignoreCase) { - this(matchVersion, c.size(), ignoreCase); + public CharArraySet(Collection c, boolean ignoreCase) { + this(c.size(), ignoreCase); addAll(c); } @@ -172,32 +151,21 @@ public class CharArraySet extends AbstractSet { /** * Returns a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property will be preserved. - *

    - * Note: If you intend to create a copy of another {@link CharArraySet} where - * the {@link Version} of the source set differs from its copy - * {@link #CharArraySet(Version, Collection, boolean)} should be used instead. - * The {@link #copy(Version, Set)} will preserve the {@link Version} of the - * source set it is an instance of {@link CharArraySet}. - *

    * - * @param matchVersion - * compatibility match version see Version - * note above for details. This argument will be ignored if the - * given set is a {@link CharArraySet}. * @param set * a set to copy * @return a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property as well as the * matchVersion will be of the given set will be preserved. */ - public static CharArraySet copy(final Version matchVersion, final Set set) { + public static CharArraySet copy(final Set set) { if(set == EMPTY_SET) return EMPTY_SET; if(set instanceof CharArraySet) { final CharArraySet source = (CharArraySet) set; - return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map)); + return new CharArraySet(CharArrayMap.copy(source.map)); } - return new CharArraySet(matchVersion, set, false); + return new CharArraySet(set, false); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java index bfa40a02af1..fd290a6c00f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java @@ -18,15 +18,12 @@ package org.apache.lucene.analysis.util; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.AttributeSource; import org.apache.lucene.analysis.util.CharacterUtils; -import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer; /** @@ -36,25 +33,18 @@ public abstract class CharTokenizer extends Tokenizer { /** * Creates a new {@link CharTokenizer} instance - * - * @param matchVersion - * Lucene version to match */ - public CharTokenizer(Version matchVersion) { - charUtils = CharacterUtils.getInstance(matchVersion); + public CharTokenizer() { } /** * Creates a new {@link CharTokenizer} instance * - * @param matchVersion - * Lucene version to match * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public CharTokenizer(Version matchVersion, AttributeFactory factory) { + public CharTokenizer(AttributeFactory factory) { super(factory); - charUtils = CharacterUtils.getInstance(matchVersion); } private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0; @@ -64,7 +54,7 @@ public abstract class CharTokenizer extends Tokenizer { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final CharacterUtils charUtils; + private final CharacterUtils charUtils = CharacterUtils.getInstance(); private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java index 022bfe1cfc2..b864ca20c51 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java @@ -34,29 +34,25 @@ public abstract class CharacterUtils { private static final Java5CharacterUtils JAVA_5 = new Java5CharacterUtils(); /** - * Returns a {@link CharacterUtils} implementation according to the given - * {@link Version} instance. - * - * @param matchVersion - * a version instance + * Returns a {@link CharacterUtils} implementation. * @return a {@link CharacterUtils} implementation according to the given * {@link Version} instance. */ - public static CharacterUtils getInstance(final Version matchVersion) { + public static CharacterUtils getInstance() { return JAVA_5; } - /** explicitly returns a version matching java 4 semantics */ + /** + * explicitly returns a version matching java 4 semantics + * @deprecated Only for n-gram backwards compat + */ + @Deprecated public static CharacterUtils getJava4Instance() { return JAVA_4; } /** * Returns the code point at the given index of the {@link CharSequence}. - * Depending on the {@link Version} passed to - * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior - * of {@link Character#codePointAt(char[], int)} as it would have been - * available on a Java 1.4 JVM or on a later virtual machine version. * * @param seq * a character sequence @@ -75,10 +71,6 @@ public abstract class CharacterUtils { /** * Returns the code point at the given index of the char array where only elements * with index less than the limit are used. - * Depending on the {@link Version} passed to - * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior - * of {@link Character#codePointAt(char[], int)} as it would have been - * available on a Java 1.4 JVM or on a later virtual machine version. * * @param chars * a character array @@ -188,10 +180,7 @@ public abstract class CharacterUtils { * the middle of a surrogate pair, even if there are remaining characters in * the {@link Reader}. *

    - * Depending on the {@link Version} passed to - * {@link CharacterUtils#getInstance(Version)} this method implements - * supplementary character awareness when filling the given buffer. For all - * {@link Version} > 3.0 {@link #fill(CharacterBuffer, Reader, int)} guarantees + * This method guarantees * that the given {@link CharacterBuffer} will never contain a high surrogate * character as the last element in the buffer unless it is the last available * character in the reader. In other words, high and low surrogate pairs will diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java index 1f5071a5382..f030475f8e2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.util.Version; /** * Abstract base class for TokenFilters that may remove tokens. @@ -32,18 +31,15 @@ import org.apache.lucene.util.Version; */ public abstract class FilteringTokenFilter extends TokenFilter { - protected final Version version; private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); private int skippedPositions; /** * Create a new {@link FilteringTokenFilter}. - * @param version the Lucene match version * @param in the {@link TokenStream} to consume */ - public FilteringTokenFilter(Version version, TokenStream in) { + public FilteringTokenFilter(TokenStream in) { super(in); - this.version = version; } /** Override this method and return if the current input token should be returned by {@link #incrementToken}. */ diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java index b98c33588c5..ff1517e90d0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java @@ -24,7 +24,6 @@ import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * Base class for Analyzers that need to make use of stopword sets. @@ -37,8 +36,6 @@ public abstract class StopwordAnalyzerBase extends Analyzer { */ protected final CharArraySet stopwords; - protected final Version matchVersion; - /** * Returns the analyzer's stopword set or an empty set if the analyzer has no * stopwords @@ -53,26 +50,20 @@ public abstract class StopwordAnalyzerBase extends Analyzer { /** * Creates a new instance initialized with the given stopword set * - * @param version - * the Lucene version for cross version compatibility * @param stopwords * the analyzer's stopword set */ - protected StopwordAnalyzerBase(final Version version, final CharArraySet stopwords) { - matchVersion = version; + protected StopwordAnalyzerBase(final CharArraySet stopwords) { // analyzers should use char array set for stopwords! this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet - .unmodifiableSet(CharArraySet.copy(version, stopwords)); + .unmodifiableSet(CharArraySet.copy(stopwords)); } /** * Creates a new Analyzer with an empty stopword set - * - * @param version - * the Lucene version for cross version compatibility */ - protected StopwordAnalyzerBase(final Version version) { - this(version, null); + protected StopwordAnalyzerBase() { + this(null); } /** @@ -99,7 +90,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer { Reader reader = null; try { reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8); - return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase)); + return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase)); } finally { IOUtils.close(reader); } @@ -111,20 +102,16 @@ public abstract class StopwordAnalyzerBase extends Analyzer { * * @param stopwords * the stopwords file to load - * - * @param matchVersion - * the Lucene version for cross version compatibility * @return a CharArraySet containing the distinct stopwords from the given * file * @throws IOException * if loading the stopwords throws an {@link IOException} */ - protected static CharArraySet loadStopwordSet(File stopwords, - Version matchVersion) throws IOException { + protected static CharArraySet loadStopwordSet(File stopwords) throws IOException { Reader reader = null; try { reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8); - return WordlistLoader.getWordSet(reader, matchVersion); + return WordlistLoader.getWordSet(reader); } finally { IOUtils.close(reader); } @@ -136,17 +123,14 @@ public abstract class StopwordAnalyzerBase extends Analyzer { * @param stopwords * the stopwords reader to load * - * @param matchVersion - * the Lucene version for cross version compatibility * @return a CharArraySet containing the distinct stopwords from the given * reader * @throws IOException * if loading the stopwords throws an {@link IOException} */ - protected static CharArraySet loadStopwordSet(Reader stopwords, - Version matchVersion) throws IOException { + protected static CharArraySet loadStopwordSet(Reader stopwords) throws IOException { try { - return WordlistLoader.getWordSet(stopwords, matchVersion); + return WordlistLoader.getWordSet(stopwords); } finally { IOUtils.close(stopwords); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java index 8fec2c00d19..26f03b896f8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java @@ -26,7 +26,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; /** * Loader for text files that represent a list of stopwords. @@ -73,11 +72,10 @@ public class WordlistLoader { * Analyzer which uses LowerCaseFilter (like StandardAnalyzer). * * @param reader Reader containing the wordlist - * @param matchVersion the Lucene {@link Version} * @return A {@link CharArraySet} with the reader's words */ - public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException { - return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)); + public static CharArraySet getWordSet(Reader reader) throws IOException { + return getWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false)); } /** @@ -88,11 +86,10 @@ public class WordlistLoader { * * @param reader Reader containing the wordlist * @param comment The string representing a comment. - * @param matchVersion the Lucene {@link Version} * @return A CharArraySet with the reader's words */ - public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException { - return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)); + public static CharArraySet getWordSet(Reader reader, String comment) throws IOException { + return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false)); } /** @@ -170,11 +167,10 @@ public class WordlistLoader { *

    * * @param reader Reader containing a Snowball stopword list - * @param matchVersion the Lucene {@link Version} * @return A {@link CharArraySet} with the reader's words */ - public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException { - return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)); + public static CharArraySet getSnowballWordSet(Reader reader) throws IOException { + return getSnowballWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false)); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java index 5371edf0059..0a59208a2de 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.util.Version; import java.text.Collator; -import java.io.Reader; /** *

    @@ -78,11 +77,10 @@ public final class CollationKeyAnalyzer extends Analyzer { /** * Create a new CollationKeyAnalyzer, using the specified collator. - * - * @param matchVersion compatibility version + * * @param collator CollationKey generator */ - public CollationKeyAnalyzer(Version matchVersion, Collator collator) { + public CollationKeyAnalyzer(Collator collator) { this.factory = new CollationAttributeFactory(collator); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java index 80b067e9052..49275c9328c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java @@ -31,14 +31,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new ArabicAnalyzer(TEST_VERSION_CURRENT); + new ArabicAnalyzer(); } /** * Some simple tests showing some features of the analyzer, how some regular forms will conflate */ public void testBasicFeatures() throws Exception { - ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT); + ArabicAnalyzer a = new ArabicAnalyzer(); assertAnalyzesTo(a, "كبير", new String[] { "كبير" }); assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker @@ -59,7 +59,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { * Simple tests to show things are getting reset correctly, etc. */ public void testReusableTokenStream() throws Exception { - ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT); + ArabicAnalyzer a = new ArabicAnalyzer(); assertAnalyzesTo(a, "كبير", new String[] { "كبير" }); assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker } @@ -68,7 +68,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { * Non-arabic text gets treated in a similar way as SimpleAnalyzer. */ public void testEnglishInput() throws Exception { - assertAnalyzesTo(new ArabicAnalyzer(TEST_VERSION_CURRENT), "English text.", new String[] { + assertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] { "english", "text" }); } @@ -76,26 +76,26 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { * Test that custom stopwords work, and are not case-sensitive. */ public void testCustomStopwords() throws Exception { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false); - ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set); + CharArraySet set = new CharArraySet(asSet("the", "and", "a"), false); + ArabicAnalyzer a = new ArabicAnalyzer(set); assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" }); } public void testWithStemExclusionSet() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("ساهدهات"), false); - ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + CharArraySet set = new CharArraySet(asSet("ساهدهات"), false); + ArabicAnalyzer a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); - a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET); + a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ArabicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ArabicAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java index 851db2b7c9a..8768e290350 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java @@ -118,7 +118,7 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase { } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("ساهدهات"); MockTokenizer tokenStream = whitespaceMockTokenizer("ساهدهات"); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java index e1579dc6d01..dd50be254a9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test the Bulgarian analyzer @@ -33,22 +32,22 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { * This test fails with NPE when the stopwords file is missing in classpath */ public void testResourcesAvailable() { - new BulgarianAnalyzer(TEST_VERSION_CURRENT); + new BulgarianAnalyzer(); } public void testStopwords() throws IOException { - Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new BulgarianAnalyzer(); assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"}); } public void testCustomStopwords() throws IOException { - Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "Как се казваш?", new String[] {"как", "се", "казваш"}); } public void testReusableTokenStream() throws IOException { - Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new BulgarianAnalyzer(); assertAnalyzesTo(a, "документи", new String[] {"документ"}); assertAnalyzesTo(a, "документ", new String[] {"документ"}); } @@ -57,7 +56,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { * Test some examples from the paper */ public void testBasicExamples() throws IOException { - Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new BulgarianAnalyzer(); assertAnalyzesTo(a, "енергийни кризи", new String[] {"енергийн", "криз"}); assertAnalyzesTo(a, "Атомната енергия", new String[] {"атомн", "енерг"}); @@ -68,14 +67,14 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { } public void testWithStemExclusionSet() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("строеве"); - Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new BulgarianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new BulgarianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java index e176afafed6..5054ff5e074 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.bg; */ import java.io.IOException; -import java.io.Reader; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; @@ -28,7 +27,6 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test the Bulgarian Stemmer @@ -39,7 +37,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * common (and some rare) plural pattern is listed. */ public void testMasculineNouns() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + BulgarianAnalyzer a = new BulgarianAnalyzer(); // -и pattern assertAnalyzesTo(a, "град", new String[] {"град"}); @@ -105,7 +103,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * Test showing how feminine noun forms conflate */ public void testFeminineNouns() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + BulgarianAnalyzer a = new BulgarianAnalyzer(); assertAnalyzesTo(a, "вест", new String[] {"вест"}); assertAnalyzesTo(a, "вестта", new String[] {"вест"}); @@ -118,7 +116,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * plural pattern is listed */ public void testNeuterNouns() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + BulgarianAnalyzer a = new BulgarianAnalyzer(); // -а pattern assertAnalyzesTo(a, "дърво", new String[] {"дърв"}); @@ -146,7 +144,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * Test showing how adjectival forms conflate */ public void testAdjectives() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + BulgarianAnalyzer a = new BulgarianAnalyzer(); assertAnalyzesTo(a, "красив", new String[] {"красив"}); assertAnalyzesTo(a, "красивия", new String[] {"красив"}); assertAnalyzesTo(a, "красивият", new String[] {"красив"}); @@ -162,7 +160,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * Test some exceptional rules, implemented as rewrites. */ public void testExceptions() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); + BulgarianAnalyzer a = new BulgarianAnalyzer(); // ци -> к assertAnalyzesTo(a, "собственик", new String[] {"собственик"}); @@ -217,7 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenStream.setReader(new StringReader("строевете строеве")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java index 33c2075d87b..3307fbd1c20 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java @@ -130,7 +130,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - Analyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new BrazilianAnalyzer(); checkReuse(a, "boa", "boa"); checkReuse(a, "boainain", "boainain"); checkReuse(a, "boas", "boas"); @@ -138,15 +138,15 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } public void testStemExclusionTable() throws Exception { - BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT, - CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("quintessência"), false)); + BrazilianAnalyzer a = new BrazilianAnalyzer( + CharArraySet.EMPTY_SET, new CharArraySet(asSet("quintessência"), false)); checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged. } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("Brasília"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LowerCaseTokenizer(); tokenizer.setReader(new StringReader("Brasília Brasilia")); BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(tokenizer, set)); @@ -154,7 +154,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } private void check(final String input, final String expected) throws Exception { - checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected); + checkOneTerm(new BrazilianAnalyzer(), input, expected); } private void checkReuse(Analyzer a, String input, String expected) throws Exception { @@ -163,7 +163,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new BrazilianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new BrazilianAnalyzer(), 1000*RANDOM_MULTIPLIER); } public void testEmptyTerm() throws IOException { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java index 4d32666b9c1..bc14adc7897 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java @@ -27,12 +27,12 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new CatalanAnalyzer(TEST_VERSION_CURRENT); + new CatalanAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new CatalanAnalyzer(); // stemming checkOneTerm(a, "llengües", "llengu"); checkOneTerm(a, "llengua", "llengu"); @@ -42,22 +42,21 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { /** test use of elisionfilter */ public void testContractions() throws IOException { - Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new CatalanAnalyzer(); assertAnalyzesTo(a, "Diccionari de l'Institut d'Estudis Catalans", new String[] { "diccion", "inst", "estud", "catalan" }); } /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("llengües"), false); - Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT, - CatalanAnalyzer.getDefaultStopSet(), exclusionSet); + CharArraySet exclusionSet = new CharArraySet(asSet("llengües"), false); + Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "llengües", "llengües"); checkOneTerm(a, "llengua", "llengu"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new CatalanAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new CatalanAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java index a7346e7db9e..fc25c5496c0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java @@ -39,7 +39,7 @@ import org.apache.lucene.analysis.util.CharArraySet; * Most tests adopted from TestCJKTokenizer */ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { - private Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT); + private Analyzer analyzer = new CJKAnalyzer(); public void testJa1() throws IOException { assertAnalyzesTo(analyzer, "一二三四五六七八九十", @@ -209,7 +209,7 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new StandardTokenizer(); return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer)); } @@ -255,7 +255,7 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); TokenFilter filter = new FakeStandardTokenizer(tokenizer); - filter = new StopFilter(TEST_VERSION_CURRENT, filter, CharArraySet.EMPTY_SET); + filter = new StopFilter(filter, CharArraySet.EMPTY_SET); filter = new CJKBigramFilter(filter); return new TokenStreamComponents(tokenizer, filter); } @@ -271,13 +271,13 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new CJKAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new CJKAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** blast some random strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new CJKAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); } public void testEmptyTerm() throws IOException { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java index 600e369f4fa..08684009d8e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java @@ -29,7 +29,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer t = new StandardTokenizer(); return new TokenStreamComponents(t, new CJKBigramFilter(t)); } }; @@ -37,7 +37,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer unibiAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer t = new StandardTokenizer(); return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, true)); } @@ -67,7 +67,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer t = new StandardTokenizer(); return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN)); } }; @@ -85,7 +85,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer t = new StandardTokenizer(); return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, false)); } @@ -119,7 +119,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer t = new StandardTokenizer(); return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java index e5faa279839..9a2c9d9969a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java @@ -32,35 +32,35 @@ public class TestSoraniAnalyzer extends BaseTokenStreamTestCase { * This test fails with NPE when the stopwords file is missing in classpath */ public void testResourcesAvailable() { - new SoraniAnalyzer(TEST_VERSION_CURRENT); + new SoraniAnalyzer(); } public void testStopwords() throws IOException { - Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new SoraniAnalyzer(); assertAnalyzesTo(a, "ئەم پیاوە", new String[] {"پیاو"}); } public void testCustomStopwords() throws IOException { - Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "ئەم پیاوە", new String[] {"ئەم", "پیاو"}); } public void testReusableTokenStream() throws IOException { - Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new SoraniAnalyzer(); assertAnalyzesTo(a, "پیاوە", new String[] {"پیاو"}); assertAnalyzesTo(a, "پیاو", new String[] {"پیاو"}); } public void testWithStemExclusionSet() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("پیاوە"); - Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "پیاوە", new String[] { "پیاوە" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SoraniAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SoraniAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java index 4a405fc4d08..ac2543d7bd5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.core.KeywordTokenizer; * Test the Sorani Stemmer. */ public class TestSoraniStemFilter extends BaseTokenStreamTestCase { - SoraniAnalyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT); + SoraniAnalyzer a = new SoraniAnalyzer(); public void testIndefiniteSingular() throws Exception { checkOneTerm(a, "پیاوێک", "پیاو"); // -ek diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java index 43300eb8968..105f214a001 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java @@ -29,15 +29,15 @@ import org.apache.lucene.analysis.util.CharArraySet; * Tests CommonGrams(Query)Filter */ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { - private static final CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList( + private static final CharArraySet commonWords = new CharArraySet(Arrays.asList( "s", "a", "b", "c", "d", "the", "of" ), false); public void testReset() throws Exception { final String input = "How the s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + WhitespaceTokenizer wt = new WhitespaceTokenizer(); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class); cgf.reset(); @@ -59,9 +59,9 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { public void testQueryReset() throws Exception { final String input = "How the s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + WhitespaceTokenizer wt = new WhitespaceTokenizer(); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); CharTermAttribute term = wt.addAttribute(CharTermAttribute.class); @@ -93,7 +93,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, + return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter( tokenizer, commonWords))); } }; @@ -163,8 +163,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, - tokenizer, commonWords)); + return new TokenStreamComponents(tokenizer, new CommonGramsFilter(tokenizer, commonWords)); } }; @@ -252,7 +251,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "How The s a brown s cow d like A B thing?"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + TokenFilter cgf = new CommonGramsFilter(wt, commonWords); assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?"}); @@ -265,7 +264,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "dog the"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "dog_the" }); } @@ -277,7 +276,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "the dog"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the_dog" }); } @@ -289,7 +288,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "the"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the" }); } @@ -301,7 +300,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "monster"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "monster" }); } @@ -313,7 +312,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "the of"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the_of" }); } @@ -325,7 +324,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(t, commonWords); return new TokenStreamComponents(t, cgf); } }; @@ -337,7 +336,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false); - CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(t, commonWords); return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java index 6385ff74580..d547e634351 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java @@ -32,7 +32,6 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter; import org.apache.lucene.analysis.charfilter.NormalizeCharMap; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Attribute; @@ -42,7 +41,7 @@ import org.xml.sax.InputSource; public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { private static CharArraySet makeDictionary(String... dictionary) { - return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true); + return new CharArraySet(Arrays.asList(dictionary), true); } public void testHyphenationCompoundWordsDA() throws Exception { @@ -52,7 +51,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter .getHyphenationTree(is); - HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, + HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter( whitespaceMockTokenizer("min veninde som er lidt af en læsehest"), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -72,7 +71,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { .getHyphenationTree(is); // the word basket will not be added due to the longest match option - HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, + HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter( whitespaceMockTokenizer("basketballkurv"), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -94,7 +93,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { .getHyphenationTree(is); HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter( - TEST_VERSION_CURRENT, + whitespaceMockTokenizer("basketballkurv"), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -106,7 +105,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { ); tf = new HyphenationCompoundWordTokenFilter( - TEST_VERSION_CURRENT, + whitespaceMockTokenizer("basketballkurv"), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -118,7 +117,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { ); tf = new HyphenationCompoundWordTokenFilter( - TEST_VERSION_CURRENT, + whitespaceMockTokenizer("basketballkurv"), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -137,7 +136,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad"); - DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, + DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter( whitespaceMockTokenizer( "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), dict); @@ -165,7 +164,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral"); - DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, + DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter( whitespaceMockTokenizer("Basfiolsfodralmakaregesäll"), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, @@ -182,7 +181,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("abcdef")); - DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, + DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter( tokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -202,7 +201,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("abcdefg")); - DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, + DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter( tokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, @@ -225,7 +224,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { MockTokenizer wsTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); wsTokenizer.setEnableChecks(false); // we will reset in a strange place wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz")); - DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, + DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter( wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, @@ -251,7 +250,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { tokenizer.setReader(new StringReader("abcdefg")); TokenStream stream = new MockRetainAttributeFilter(tokenizer); stream = new DictionaryCompoundWordTokenFilter( - TEST_VERSION_CURRENT, stream, dict, + stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); @@ -323,7 +322,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict); + TokenFilter filter = new DictionaryCompoundWordTokenFilter(tokenizer, dict); return new TokenStreamComponents(tokenizer, filter); } @@ -347,7 +346,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict)); + return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(tokenizer, dict)); } }; checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); @@ -359,7 +358,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator); + TokenFilter filter = new HyphenationCompoundWordTokenFilter(tokenizer, hyphenator); return new TokenStreamComponents(tokenizer, filter); } }; @@ -373,7 +372,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict)); + return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(tokenizer, dict)); } }; checkOneTerm(a, "", ""); @@ -385,7 +384,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator); + TokenFilter filter = new HyphenationCompoundWordTokenFilter(tokenizer, hyphenator); return new TokenStreamComponents(tokenizer, filter); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java index 5fc558296d6..24b9629e527 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java @@ -31,7 +31,7 @@ import org.apache.lucene.util.BytesRef; public class TestAnalyzers extends BaseTokenStreamTestCase { public void testSimple() throws Exception { - Analyzer a = new SimpleAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new SimpleAnalyzer(); assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo", "bar", "foo", "bar" }); assertAnalyzesTo(a, "foo bar . FOO <> BAR", @@ -51,7 +51,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { } public void testNull() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new WhitespaceAnalyzer(); assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo", "bar", "FOO", "BAR" }); assertAnalyzesTo(a, "foo bar . FOO <> BAR", @@ -71,7 +71,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { } public void testStop() throws Exception { - Analyzer a = new StopAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new StopAnalyzer(); assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo", "bar", "foo", "bar" }); assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", @@ -94,12 +94,12 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testPayloadCopy() throws IOException { String s = "how now brown cow"; TokenStream ts; - ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + ts = new WhitespaceTokenizer(); ((Tokenizer)ts).setReader(new StringReader(s)); ts = new PayloadSetter(ts); verifyPayload(ts); - ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + ts = new WhitespaceTokenizer(); ((Tokenizer)ts).setReader(new StringReader(s)); ts = new PayloadSetter(ts); verifyPayload(ts); @@ -124,8 +124,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); - return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer)); + Tokenizer tokenizer = new WhitespaceTokenizer(); + return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer)); } } @@ -134,8 +134,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); - return new TokenStreamComponents(tokenizer, new UpperCaseFilter(TEST_VERSION_CURRENT, tokenizer)); + Tokenizer tokenizer = new WhitespaceTokenizer(); + return new TokenStreamComponents(tokenizer, new UpperCaseFilter(tokenizer)); } } @@ -190,10 +190,9 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testLowerCaseFilterLowSurrogateLeftover() throws IOException { // test if the limit of the termbuffer is correctly used with supplementary // chars - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("BogustermBogusterm\udc16")); - LowerCaseFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, - tokenizer); + LowerCaseFilter filter = new LowerCaseFilter(tokenizer); assertTokenStreamContents(filter, new String[] {"bogustermbogusterm\udc16"}); filter.reset(); String highSurEndingUpper = "BogustermBoguster\ud801"; @@ -208,7 +207,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testLowerCaseTokenizer() throws IOException { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); - LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(); tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "\ud801\udc44test" }); @@ -216,7 +215,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testWhitespaceTokenizer() throws IOException { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "\ud801\udc1ctest" }); @@ -224,17 +223,17 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); - checkRandomData(random(), new SimpleAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); - checkRandomData(random(), new StopAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new WhitespaceAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SimpleAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new StopAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); - checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); - checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new WhitespaceAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new SimpleAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new StopAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java index 65bc8b2bbcc..97369afc856 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java @@ -47,7 +47,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; @SuppressCodecs("Direct") public class TestBugInSomething extends BaseTokenStreamTestCase { public void test() throws Exception { - final CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); + final CharArraySet cas = new CharArraySet(3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); @@ -62,7 +62,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenFilter.ENGLISH_STOPSET, false, -65); - TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); + TokenFilter f = new CommonGramsFilter(t, cas); return new TokenStreamComponents(t, f); } @@ -250,11 +250,11 @@ public class TestBugInSomething extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 2, 94); + Tokenizer tokenizer = new EdgeNGramTokenizer(2, 94); //TokenStream stream = new SopTokenFilter(tokenizer); TokenStream stream = new ShingleFilter(tokenizer, 5); //stream = new SopTokenFilter(stream); - stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83); + stream = new NGramTokenFilter(stream, 55, 83); //stream = new SopTokenFilter(stream); return new TokenStreamComponents(tokenizer, stream); } @@ -263,7 +263,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase { } public void testCuriousWikipediaString() throws Exception { - final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>( + final CharArraySet protWords = new CharArraySet(new HashSet<>( Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false); final byte table[] = new byte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, @@ -278,7 +278,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new WikipediaTokenizer(); TokenStream stream = new SopTokenFilter(tokenizer); - stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, -50, protWords); + stream = new WordDelimiterFilter(stream, table, -50, protWords); stream = new SopTokenFilter(stream); return new TokenStreamComponents(tokenizer, stream); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java index e1ae2d55eb5..70f60d108b0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java @@ -40,16 +40,16 @@ import java.util.Random; public class TestClassicAnalyzer extends BaseTokenStreamTestCase { - private Analyzer a = new ClassicAnalyzer(TEST_VERSION_CURRENT); + private Analyzer a = new ClassicAnalyzer(); public void testMaxTermLength() throws Exception { - ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + ClassicAnalyzer sa = new ClassicAnalyzer(); sa.setMaxTokenLength(5); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}); } public void testMaxTermLength2() throws Exception { - ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + ClassicAnalyzer sa = new ClassicAnalyzer(); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"}); sa.setMaxTokenLength(5); @@ -113,7 +113,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { public void testLucene1140() throws Exception { try { - ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); + ClassicAnalyzer analyzer = new ClassicAnalyzer(); assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); } catch (NullPointerException e) { fail("Should not throw an NPE and it did"); @@ -123,7 +123,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { public void testDomainNames() throws Exception { // Current lucene should not show the bug - ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); + ClassicAnalyzer a2 = new ClassicAnalyzer(); // domain names assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"}); @@ -137,7 +137,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { // 2.4 should not show the bug. But, alas, it's also obsolete, // so we check latest released (Robert's gonna break this on 4.0 soon :) ) - a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); + a2 = new ClassicAnalyzer(); assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); } @@ -244,7 +244,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { } public void testJava14BWCompatibility() throws Exception { - ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + ClassicAnalyzer sa = new ClassicAnalyzer(); assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" }); } @@ -253,8 +253,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { */ public void testWickedLongTerm() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer())); char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.fill(chars, 'x'); @@ -300,7 +299,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { // maximum length term, and search on that term: doc = new Document(); doc.add(new TextField("content", bigTerm, Field.Store.NO)); - ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + ClassicAnalyzer sa = new ClassicAnalyzer(); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); @@ -314,12 +313,12 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ClassicAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new ClassicAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java index 8d97f33d7fe..63df6be7cae 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java @@ -69,7 +69,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -89,7 +89,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -107,7 +107,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -126,7 +126,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -144,7 +144,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -163,7 +163,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java index 6de3c4a0fee..b2d7b05f652 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java @@ -49,8 +49,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( - TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer())); Document doc = new Document(); doc.add(new StringField("partnum", "Q36", Field.Store.YES)); @@ -72,7 +71,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { /* public void testPerFieldAnalyzer() throws Exception { - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT)); + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); analyzer.addAnalyzer("partnum", new KeywordAnalyzer()); QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, "description", analyzer); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index f3972e84982..ff0356b3192 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -60,7 +60,7 @@ import org.apache.lucene.analysis.charfilter.NormalizeCharMap; import org.apache.lucene.analysis.cjk.CJKBigramFilter; import org.apache.lucene.analysis.commongrams.CommonGramsFilter; import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter; -import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter; +import org.apache.lucene.analysis.compound.Lucene43HyphenationCompoundWordTokenFilter; import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.hunspell.Dictionary; @@ -385,7 +385,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { put(CharArraySet.class, new ArgProducer() { @Override public Object create(Random random) { int num = random.nextInt(10); - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean()); + CharArraySet set = new CharArraySet(num, random.nextBoolean()); for (int i = 0; i < num; i++) { // TODO: make nastier set.add(TestUtil.randomSimpleString(random)); @@ -429,7 +429,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { // TODO: make nastier try { InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm()); - HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); + HyphenationTree hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is); return hyphenator; } catch (Exception ex) { Rethrow.rethrow(ex); @@ -494,7 +494,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { put(CharArrayMap.class, new ArgProducer() { @Override public Object create(Random random) { int num = random.nextInt(10); - CharArrayMap map = new CharArrayMap<>(TEST_VERSION_CURRENT, num, random.nextBoolean()); + CharArrayMap map = new CharArrayMap<>(num, random.nextBoolean()); for (int i = 0; i < num; i++) { // TODO: make nastier map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random)); @@ -619,7 +619,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { args[i] = stream; } else if (paramType == CommonGramsFilter.class) { // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly - args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, newRandomArg(random, CharArraySet.class)); + args[i] = new CommonGramsFilter(stream, newRandomArg(random, CharArraySet.class)); } else { args[i] = newRandomArg(random, paramType); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java index bbb656c9794..a8bca2c8367 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.core; */ import java.io.IOException; -import java.io.Reader; import java.io.StringReader; import java.util.Arrays; import java.util.Random; @@ -30,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.util.Version; public class TestStandardAnalyzer extends BaseTokenStreamTestCase { @@ -41,7 +39,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); + StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -50,7 +48,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; @@ -282,13 +280,13 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new StandardAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new StandardAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new StandardAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); } // Adds random graph after: @@ -298,7 +296,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory()); TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, tokenStream); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java index 9838fe1f8a2..de8b061f070 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java @@ -22,7 +22,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; import java.io.IOException; import java.util.Iterator; @@ -31,7 +30,7 @@ import java.util.HashSet; public class TestStopAnalyzer extends BaseTokenStreamTestCase { - private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT); + private StopAnalyzer stop = new StopAnalyzer(); private Set inValidTokens = new HashSet<>(); @Override @@ -59,8 +58,8 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { } public void testStopList() throws IOException { - CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); - StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); + CharArraySet stopWordsSet = new CharArraySet(asSet("good", "test", "analyzer"), false); + StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); try (TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer")) { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); @@ -75,8 +74,8 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { } public void testStopListPositions() throws IOException { - CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); - StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); + CharArraySet stopWordsSet = new CharArraySet(asSet("good", "test", "analyzer"), false); + StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); String s = "This is a good test of the english stop analyzer with positions"; int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; try (TokenStream stream = newStop.tokenStream("test", s)) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java index 972f8731912..c21d7f5842d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java @@ -28,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.English; -import org.apache.lucene.util.Version; public class TestStopFilter extends BaseTokenStreamTestCase { @@ -37,20 +36,20 @@ public class TestStopFilter extends BaseTokenStreamTestCase { public void testExactCase() throws IOException { StringReader reader = new StringReader("Now is The Time"); - CharArraySet stopWords = new CharArraySet(TEST_VERSION_CURRENT, asSet("is", "the", "Time"), false); + CharArraySet stopWords = new CharArraySet(asSet("is", "the", "Time"), false); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(reader); - TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, in, stopWords); + TokenStream stream = new StopFilter(in, stopWords); assertTokenStreamContents(stream, new String[] { "Now", "The" }); } public void testStopFilt() throws IOException { StringReader reader = new StringReader("Now is The Time"); String[] stopWords = new String[] { "is", "the", "Time" }; - CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); + CharArraySet stopSet = StopFilter.makeStopSet(stopWords); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(reader); - TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, in, stopSet); + TokenStream stream = new StopFilter(in, stopSet); assertTokenStreamContents(stream, new String[] { "Now", "The" }); } @@ -68,12 +67,12 @@ public class TestStopFilter extends BaseTokenStreamTestCase { log(sb.toString()); String stopWords[] = a.toArray(new String[0]); for (int i=0; i a0 = new ArrayList<>(); @@ -89,22 +88,22 @@ public class TestStopFilter extends BaseTokenStreamTestCase { for (int i=0; i stopTypes = asSet(""); - final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + final StandardTokenizer input = new StandardTokenizer(newAttributeFactory()); input.setReader(reader); - TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopTypes); + TokenStream stream = new TypeTokenFilter(input, stopTypes); assertTokenStreamContents(stream, new String[]{"is", "palindrome", "while", "is", "not"}); } @@ -61,9 +61,9 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase { // with increments StringReader reader = new StringReader(sb.toString()); - final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT); + final StandardTokenizer input = new StandardTokenizer(); input.setReader(reader); - TypeTokenFilter typeTokenFilter = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopSet); + TypeTokenFilter typeTokenFilter = new TypeTokenFilter(input, stopSet); testPositons(typeTokenFilter); } @@ -85,9 +85,9 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase { public void testTypeFilterWhitelist() throws IOException { StringReader reader = new StringReader("121 is palindrome, while 123 is not"); Set stopTypes = Collections.singleton(""); - final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + final StandardTokenizer input = new StandardTokenizer(newAttributeFactory()); input.setReader(reader); - TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopTypes, true); + TokenStream stream = new TypeTokenFilter(input, stopTypes, true); assertTokenStreamContents(stream, new String[]{"121", "123"}); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java index 75cfc09ca0f..b45e1b00067 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java @@ -20,14 +20,13 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer; -import org.apache.lucene.util.Version; import java.io.IOException; import java.util.Arrays; public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase { - private Analyzer a = new UAX29URLEmailAnalyzer(TEST_VERSION_CURRENT); + private Analyzer a = new UAX29URLEmailAnalyzer(); public void testHugeDoc() throws IOException { StringBuilder sb = new StringBuilder(); @@ -344,6 +343,6 @@ public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new UAX29URLEmailAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new UAX29URLEmailAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index 865e6c6b46f..edaa781fbdd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -5,10 +5,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.util.Version; import java.io.BufferedReader; import java.io.IOException; @@ -47,7 +45,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -56,7 +54,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; @@ -103,7 +101,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { private Analyzer urlAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); tokenizer.setMaxTokenLength(Integer.MAX_VALUE); // Tokenize arbitrary length URLs TokenFilter filter = new URLFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); @@ -113,7 +111,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { private Analyzer emailAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); TokenFilter filter = new EmailFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java index e668a9da770..91b0be7c3f8 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test the CzechAnalyzer @@ -33,25 +32,25 @@ import org.apache.lucene.util.Version; public class TestCzechAnalyzer extends BaseTokenStreamTestCase { public void testStopWord() throws Exception { - assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem", + assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); } public void testReusableTokenStream() throws Exception { - Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new CzechAnalyzer(); assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); assertAnalyzesTo(analyzer, "Česká Republika", new String[] { "česk", "republik" }); } public void testWithStemExclusionSet() throws IOException{ - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("hole"); - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + CzechAnalyzer cz = new CzechAnalyzer(CharArraySet.EMPTY_SET, set); assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"}); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new CzechAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new CzechAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java index 36cdd59f49f..c7b4c7ee072 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java @@ -41,7 +41,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how masculine noun forms conflate */ public void testMasculineNouns() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); /* animate ending with a hard consonant */ assertAnalyzesTo(cz, "pán", new String[] { "pán" }); @@ -109,7 +109,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how feminine noun forms conflate */ public void testFeminineNouns() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); /* ending with hard consonant */ assertAnalyzesTo(cz, "kost", new String[] { "kost" }); @@ -153,7 +153,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how neuter noun forms conflate */ public void testNeuterNouns() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); /* ending with o */ assertAnalyzesTo(cz, "město", new String[] { "měst" }); @@ -196,7 +196,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how adjectival forms conflate */ public void testAdjectives() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); /* ending with ý/á/é */ assertAnalyzesTo(cz, "mladý", new String[] { "mlad" }); @@ -224,7 +224,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test some possessive suffixes */ public void testPossessive() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); assertAnalyzesTo(cz, "Karlův", new String[] { "karl" }); assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" }); } @@ -233,7 +233,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test some exceptional rules, implemented as rewrites. */ public void testExceptions() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); /* rewrite of št -> sk */ assertAnalyzesTo(cz, "český", new String[] { "česk" }); @@ -273,13 +273,13 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test that very short words are not stemmed. */ public void testDontStem() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); + CzechAnalyzer cz = new CzechAnalyzer(); assertAnalyzesTo(cz, "e", new String[] { "e" }); assertAnalyzesTo(cz, "zi", new String[] { "zi" }); } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); set.add("hole"); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(new StringReader("hole desek")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java index eddf531d0de..a0a591060b5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java @@ -27,12 +27,12 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new DanishAnalyzer(TEST_VERSION_CURRENT); + new DanishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new DanishAnalyzer(); // stemming checkOneTerm(a, "undersøg", "undersøg"); checkOneTerm(a, "undersøgelse", "undersøg"); @@ -42,8 +42,8 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false); - Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("undersøgelse"), false); + Analyzer a = new DanishAnalyzer( DanishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "undersøgelse", "undersøgelse"); checkOneTerm(a, "undersøg", "undersøg"); @@ -51,6 +51,6 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new DanishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new DanishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java index 731dc2b1789..fd0cf20e583 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java @@ -28,16 +28,16 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestGermanAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { - Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new GermanAnalyzer(); checkOneTerm(a, "Tisch", "tisch"); checkOneTerm(a, "Tische", "tisch"); checkOneTerm(a, "Tischen", "tisch"); } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet( 1, true); set.add("fischen"); - final LowerCaseTokenizer in = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + final LowerCaseTokenizer in = new LowerCaseTokenizer(); in.setReader(new StringReader("Fischen Trinken")); GermanStemFilter filter = new GermanStemFilter( new SetKeywordMarkerFilter(in, set)); @@ -45,8 +45,8 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { } public void testStemExclusionTable() throws Exception { - GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, - new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false)); + GermanAnalyzer a = new GermanAnalyzer( CharArraySet.EMPTY_SET, + new CharArraySet( asSet("tischen"), false)); checkOneTerm(a, "tischen", "tischen"); } @@ -54,7 +54,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer */ public void testGermanSpecials() throws Exception { - GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT); + GermanAnalyzer a = new GermanAnalyzer(); // a/o/u + e is equivalent to the umlaut form checkOneTerm(a, "Schaltflächen", "schaltflach"); checkOneTerm(a, "Schaltflaechen", "schaltflach"); @@ -62,6 +62,6 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new GermanAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new GermanAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java index 7ed23a0c161..b9a6bd31098 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java @@ -49,7 +49,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java index 830cb7e52d5..a8b7e7b1e91 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java @@ -56,7 +56,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java index 3132787f292..1b49c409ccd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java @@ -44,7 +44,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false); return new TokenStreamComponents(t, - new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); + new GermanStemFilter(new LowerCaseFilter(t))); } }; @@ -55,7 +55,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java index 86dae1ee6d1..d416898cb08 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java @@ -31,7 +31,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { * @throws Exception in case an error occurs */ public void testAnalyzer() throws Exception { - Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new GreekAnalyzer(); // Verify the correct analysis of capitals and small accented letters, and // stemming assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", @@ -48,7 +48,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new GreekAnalyzer(); // Verify the correct analysis of capitals and small accented letters, and // stemming assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", @@ -66,6 +66,6 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new GreekAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new GreekAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java index 50813edaf2c..cd5a2c1b105 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; public class TestGreekStemmer extends BaseTokenStreamTestCase { - Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new GreekAnalyzer(); public void testMasculineNouns() throws Exception { // -ος diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java index 82b2b036be1..3844cbd7960 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java @@ -27,12 +27,12 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new EnglishAnalyzer(TEST_VERSION_CURRENT); + new EnglishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new EnglishAnalyzer(); // stemming checkOneTerm(a, "books", "book"); checkOneTerm(a, "book", "book"); @@ -46,8 +46,8 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false); - Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("books"), false); + Analyzer a = new EnglishAnalyzer( EnglishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "books", "books"); checkOneTerm(a, "book", "book"); @@ -55,6 +55,6 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new EnglishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new EnglishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java index 8c9b377ad19..36fbf4f5dfc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java @@ -53,7 +53,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase { } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet( 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("yourselves yours")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java index 2338906e810..9a6c06f0d31 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java @@ -27,12 +27,12 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new SpanishAnalyzer(TEST_VERSION_CURRENT); + new SpanishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new SpanishAnalyzer(); // stemming checkOneTerm(a, "chicana", "chican"); checkOneTerm(a, "chicano", "chican"); @@ -42,8 +42,8 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false); - Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("chicano"), false); + Analyzer a = new SpanishAnalyzer( SpanishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "chicana", "chican"); checkOneTerm(a, "chicano", "chicano"); @@ -51,6 +51,6 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SpanishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SpanishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java index ca9aa67c151..d398ec9d437 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java @@ -27,12 +27,12 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new BasqueAnalyzer(TEST_VERSION_CURRENT); + new BasqueAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new BasqueAnalyzer(); // stemming checkOneTerm(a, "zaldi", "zaldi"); checkOneTerm(a, "zaldiak", "zaldi"); @@ -42,8 +42,8 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false); - Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("zaldiak"), false); + Analyzer a = new BasqueAnalyzer( BasqueAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "zaldiak", "zaldiak"); checkOneTerm(a, "mendiari", "mendi"); @@ -51,6 +51,6 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new BasqueAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new BasqueAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java index 64510cf40b5..67dace3253c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java @@ -31,7 +31,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * This test fails with NPE when the stopwords file is missing in classpath */ public void testResourcesAvailable() { - new PersianAnalyzer(TEST_VERSION_CURRENT); + new PersianAnalyzer(); } /** @@ -42,7 +42,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar */ public void testBehaviorVerbs() throws Exception { - Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PersianAnalyzer(); // active present indicative assertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" }); // active preterite indicative @@ -118,7 +118,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar */ public void testBehaviorVerbsDefective() throws Exception { - Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PersianAnalyzer(); // active present indicative assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" }); // active preterite indicative @@ -189,7 +189,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * nouns, removing the plural -ha. */ public void testBehaviorNouns() throws Exception { - Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PersianAnalyzer(); assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" }); assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" }); } @@ -199,7 +199,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * (lowercased, etc) */ public void testBehaviorNonPersian() throws Exception { - Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PersianAnalyzer(); assertAnalyzesTo(a, "English test.", new String[] { "english", "test" }); } @@ -207,7 +207,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * Basic test ensuring that tokenStream works correctly. */ public void testReusableTokenStream() throws Exception { - Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PersianAnalyzer(); assertAnalyzesTo(a, "خورده مي شده بوده باشد", new String[] { "خورده" }); assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" }); } @@ -216,14 +216,14 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * Test that custom stopwords work, and are not case-sensitive. */ public void testCustomStopwords() throws Exception { - PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, - new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false)); + PersianAnalyzer a = new PersianAnalyzer( + new CharArraySet( asSet("the", "and", "a"), false)); assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new PersianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new PersianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java index e3ef862e6a2..3fb7ce85369 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java @@ -27,12 +27,12 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new FinnishAnalyzer(TEST_VERSION_CURRENT); + new FinnishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new FinnishAnalyzer(); // stemming checkOneTerm(a, "edeltäjiinsä", "edeltäj"); checkOneTerm(a, "edeltäjistään", "edeltäj"); @@ -42,8 +42,8 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); - Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("edeltäjistään"), false); + Analyzer a = new FinnishAnalyzer( FinnishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "edeltäjiinsä", "edeltäj"); checkOneTerm(a, "edeltäjistään", "edeltäjistään"); @@ -51,6 +51,6 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new FinnishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new FinnishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java index 2b02b8c0e2b..985ec1abc5c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java @@ -48,7 +48,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("edeltäjistään"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java index 6680b7e57f2..112573b358c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test case for FrenchAnalyzer. @@ -32,7 +31,7 @@ import org.apache.lucene.util.Version; public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { public void testAnalyzer() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); + FrenchAnalyzer fa = new FrenchAnalyzer(); assertAnalyzesTo(fa, "", new String[] { }); @@ -115,7 +114,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); + FrenchAnalyzer fa = new FrenchAnalyzer(); // stopwords assertAnalyzesTo( fa, @@ -136,20 +135,20 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { } public void testExclusionTableViaCtor() throws Exception { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet( 1, true); set.add("habitable"); - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, + FrenchAnalyzer fa = new FrenchAnalyzer( CharArraySet.EMPTY_SET, set); assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" }); - fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + fa = new FrenchAnalyzer( CharArraySet.EMPTY_SET, set); assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" }); } public void testElision() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); + FrenchAnalyzer fa = new FrenchAnalyzer(); assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouil" }); } @@ -157,18 +156,18 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { * Test that stopwords are not case sensitive */ public void testStopwordsCasing() throws IOException { - FrenchAnalyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT); + FrenchAnalyzer a = new FrenchAnalyzer(); assertAnalyzesTo(a, "Votre", new String[] { }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new FrenchAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new FrenchAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** test accent-insensitive */ public void testAccentInsensitive() throws Exception { - Analyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new FrenchAnalyzer(); checkOneTerm(a, "sécuritaires", "securitair"); checkOneTerm(a, "securitaires", "securitair"); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java index 354a252f640..ce0a038fa82 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java @@ -179,7 +179,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java index 62e06cbc523..5330e0ed67c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java @@ -58,7 +58,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java index 8db7c66e5b1..994dff5adda 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java @@ -27,12 +27,12 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new IrishAnalyzer(TEST_VERSION_CURRENT); + new IrishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new IrishAnalyzer(); // stemming checkOneTerm(a, "siopadóireacht", "siopadóir"); checkOneTerm(a, "síceapatacha", "síceapaite"); @@ -42,15 +42,15 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** test use of elisionfilter */ public void testContractions() throws IOException { - Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new IrishAnalyzer(); assertAnalyzesTo(a, "b'fhearr m'athair", new String[] { "fearr", "athair" }); } /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("feirmeoireacht"), false); - Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("feirmeoireacht"), false); + Analyzer a = new IrishAnalyzer( IrishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "feirmeoireacht", "feirmeoireacht"); checkOneTerm(a, "siopadóireacht", "siopadóir"); @@ -58,7 +58,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** test special hyphen handling */ public void testHyphens() throws IOException { - Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new IrishAnalyzer(); assertAnalyzesTo(a, "n-athair", new String[] { "athair" }, new int[] { 2 }); @@ -66,6 +66,6 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new IrishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new IrishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java index 0ce5d21aaba..3d5e47e08d5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java @@ -27,12 +27,12 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new GalicianAnalyzer(TEST_VERSION_CURRENT); + new GalicianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new GalicianAnalyzer(); // stemming checkOneTerm(a, "correspondente", "correspond"); checkOneTerm(a, "corresponderá", "correspond"); @@ -42,8 +42,8 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("correspondente"), false); - Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("correspondente"), false); + Analyzer a = new GalicianAnalyzer( GalicianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "correspondente", "correspondente"); checkOneTerm(a, "corresponderá", "correspond"); @@ -51,6 +51,6 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new GalicianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new GalicianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java index 8980d07e2a3..b309bb422a5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java @@ -54,7 +54,7 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("elefantes"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("elefantes"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java index bfb4f77f543..be9eadaf9de 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java @@ -28,25 +28,25 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new HindiAnalyzer(TEST_VERSION_CURRENT); + new HindiAnalyzer(); } public void testBasics() throws Exception { - Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new HindiAnalyzer(); // two ways to write 'hindi' itself. checkOneTerm(a, "हिन्दी", "हिंद"); checkOneTerm(a, "हिंदी", "हिंद"); } public void testExclusionSet() throws Exception { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("हिंदी"), false); - Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("हिंदी"), false); + Analyzer a = new HindiAnalyzer( HindiAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "हिंदी", "हिंदी"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new HindiAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new HindiAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java index a395def06ac..5caff3fb30d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java @@ -27,12 +27,12 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new HungarianAnalyzer(TEST_VERSION_CURRENT); + new HungarianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new HungarianAnalyzer(); // stemming checkOneTerm(a, "babakocsi", "babakocs"); checkOneTerm(a, "babakocsijáért", "babakocs"); @@ -42,8 +42,8 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); - Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("babakocsi"), false); + Analyzer a = new HungarianAnalyzer( HungarianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "babakocsi", "babakocsi"); checkOneTerm(a, "babakocsijáért", "babakocs"); @@ -51,6 +51,6 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new HungarianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new HungarianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java index 38213e068ff..46478894113 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java @@ -49,7 +49,7 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("babakocsi"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java index 5656f6ee753..9deaca10795 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java @@ -64,7 +64,7 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase { // assert with keyword marker tokenizer = whitespaceMockTokenizer("lucene is awesome"); - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true); + CharArraySet set = new CharArraySet( Arrays.asList("Lucene"), true); filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary); assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1}); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java index ef74e391b46..2e046189d01 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java @@ -27,12 +27,12 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new ArmenianAnalyzer(TEST_VERSION_CURRENT); + new ArmenianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new ArmenianAnalyzer(); // stemming checkOneTerm(a, "արծիվ", "արծ"); checkOneTerm(a, "արծիվներ", "արծ"); @@ -42,8 +42,8 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("արծիվներ"), false); - Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("արծիվներ"), false); + Analyzer a = new ArmenianAnalyzer( ArmenianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "արծիվներ", "արծիվներ"); checkOneTerm(a, "արծիվ", "արծ"); @@ -51,6 +51,6 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ArmenianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ArmenianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java index ce3cd6edd20..a134b31c015 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java @@ -27,12 +27,12 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new IndonesianAnalyzer(TEST_VERSION_CURRENT); + new IndonesianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new IndonesianAnalyzer(); // stemming checkOneTerm(a, "peledakan", "ledak"); checkOneTerm(a, "pembunuhan", "bunuh"); @@ -42,8 +42,8 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false); - Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("peledakan"), false); + Analyzer a = new IndonesianAnalyzer( IndonesianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "peledakan", "peledakan"); checkOneTerm(a, "pembunuhan", "bunuh"); @@ -51,6 +51,6 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new IndonesianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new IndonesianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java index c93781cf7e1..ec359fbd44b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java @@ -18,24 +18,21 @@ package org.apache.lucene.analysis.it; */ import java.io.IOException; -import java.util.HashSet; -import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; public class TestItalianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new ItalianAnalyzer(TEST_VERSION_CURRENT); + new ItalianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new ItalianAnalyzer(); // stemming checkOneTerm(a, "abbandonata", "abbandonat"); checkOneTerm(a, "abbandonati", "abbandonat"); @@ -45,8 +42,8 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false); - Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("abbandonata"), false); + Analyzer a = new ItalianAnalyzer( ItalianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "abbandonata", "abbandonata"); checkOneTerm(a, "abbandonati", "abbandonat"); @@ -54,12 +51,12 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ItalianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ItalianAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** test that the elisionfilter is working */ public void testContractions() throws IOException { - Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new ItalianAnalyzer(); assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" }); assertAnalyzesTo(a, "l'Italiano", new String[] { "italian" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java index 33ac2e3f12b..4bf69a503cf 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java @@ -27,12 +27,12 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new LatvianAnalyzer(TEST_VERSION_CURRENT); + new LatvianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new LatvianAnalyzer(); // stemming checkOneTerm(a, "tirgiem", "tirg"); checkOneTerm(a, "tirgus", "tirg"); @@ -42,8 +42,8 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false); - Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("tirgiem"), false); + Analyzer a = new LatvianAnalyzer( LatvianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "tirgiem", "tirgiem"); checkOneTerm(a, "tirgus", "tirg"); @@ -51,6 +51,6 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new LatvianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new LatvianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java index 96998d5d046..0d1141ef77a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java @@ -37,7 +37,7 @@ import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*; /** Tests {@link CapitalizationFilter} */ public class TestCapitalizationFilter extends BaseTokenStreamTestCase { public void testCapitalization() throws Exception { - CharArraySet keep = new CharArraySet(TEST_VERSION_CURRENT, + CharArraySet keep = new CharArraySet( Arrays.asList("and", "the", "it", "BIG"), false); assertCapitalizesTo("kiTTEN", new String[] { "Kitten" }, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java index 2a158ec2367..442cfe2b54f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java @@ -31,7 +31,7 @@ import org.junit.Test; public class TestCodepointCountFilter extends BaseTokenStreamTestCase { public void testFilterWithPosIncr() throws Exception { TokenStream stream = whitespaceMockTokenizer("short toolong evenmuchlongertext a ab toolong foo"); - CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6); + CodepointCountFilter filter = new CodepointCountFilter(stream, 2, 6); assertTokenStreamContents(filter, new String[]{"short", "ab", "foo"}, new int[]{1, 4, 2} @@ -43,7 +43,7 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); + return new TokenStreamComponents(tokenizer, new CodepointCountFilter(tokenizer, 0, 5)); } }; checkOneTerm(a, "", ""); @@ -63,7 +63,7 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { boolean expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(); ((Tokenizer)stream).setReader(new StringReader(text)); - stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max); + stream = new CodepointCountFilter(stream, min, max); stream.reset(); assertEquals(expected, stream.incrementToken()); stream.end(); @@ -76,6 +76,6 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { */ @Test(expected = IllegalArgumentException.class) public void testIllegalArguments() throws Exception { - new CodepointCountFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), 4, 1); + new CodepointCountFilter(whitespaceMockTokenizer("accept only valid arguments"), 4, 1); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java index 465c54c31f1..50b5edcd717 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java @@ -40,12 +40,12 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase { // Test Stopwords TokenStream stream = whitespaceMockTokenizer(input); - stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true)); + stream = new KeepWordFilter(stream, new CharArraySet( words, true)); assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 }); // Now force case stream = whitespaceMockTokenizer(input); - stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false)); + stream = new KeepWordFilter(stream, new CharArraySet(words, false)); assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 }); } @@ -60,7 +60,7 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true)); + TokenStream stream = new KeepWordFilter(tokenizer, new CharArraySet( words, true)); return new TokenStreamComponents(tokenizer, stream); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java index a2853060d9a..eecc9e81aa1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java @@ -38,13 +38,13 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { @Test public void testSetFilterIncrementToken() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true); + CharArraySet set = new CharArraySet( 5, true); set.add("lucenefox"); String[] output = new String[] { "the", "quick", "brown", "LuceneFox", "jumps" }; assertTokenStreamContents(new LowerCaseFilterMock( new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), set)), output); - CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false); + CharArraySet mixedCaseSet = new CharArraySet( asSet("LuceneFox"), false); assertTokenStreamContents(new LowerCaseFilterMock( new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), mixedCaseSet)), output); CharArraySet set2 = set; @@ -72,8 +72,8 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { new SetKeywordMarkerFilter( new SetKeywordMarkerFilter( whitespaceMockTokenizer("Dogs Trees Birds Houses"), - new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)), - new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false))); + new CharArraySet( asSet("Birds", "Houses"), false)), + new CharArraySet( asSet("Dogs", "Trees"), false))); assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); @@ -91,7 +91,7 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { new PatternKeywordMarkerFilter( whitespaceMockTokenizer("Dogs Trees Birds Houses"), Pattern.compile("Birds|Houses")), - new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false))); + new CharArraySet( asSet("Dogs", "Trees"), false))); assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java index 0aa47149601..89e377f4fc7 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java @@ -33,7 +33,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { public void testFilterWithPosIncr() throws Exception { TokenStream stream = whitespaceMockTokenizer("short toolong evenmuchlongertext a ab toolong foo"); - LengthFilter filter = new LengthFilter(TEST_VERSION_CURRENT, stream, 2, 6); + LengthFilter filter = new LengthFilter(stream, 2, 6); assertTokenStreamContents(filter, new String[]{"short", "ab", "foo"}, new int[]{1, 4, 2} @@ -45,7 +45,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); + return new TokenStreamComponents(tokenizer, new LengthFilter(tokenizer, 0, 5)); } }; checkOneTerm(a, "", ""); @@ -56,6 +56,6 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { */ @Test(expected = IllegalArgumentException.class) public void testIllegalArguments() throws Exception { - new LengthFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), -4, -1); + new LengthFilter(whitespaceMockTokenizer("accept only valid arguments"), -4, -1); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java index 75a9ab067a6..7a20192c557 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java @@ -211,7 +211,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testPositionIncrements() throws Exception { final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); + final CharArraySet protWords = new CharArraySet(new HashSet<>(Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new Analyzer() { @@ -300,8 +300,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, - tokenizer, StandardAnalyzer.STOP_WORDS_SET); + StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET); return new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(filter, flags, protWords)); } }; @@ -333,7 +332,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = random().nextInt(512); final CharArraySet protectedWords; if (random().nextBoolean()) { - protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -356,7 +355,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = i; final CharArraySet protectedWords; if (random.nextBoolean()) { - protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java index af69fa851e2..ff833866472 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java @@ -39,10 +39,10 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase { String text = "Qwerty"; Map analyzerPerField = - Collections.singletonMap("special", new SimpleAnalyzer(TEST_VERSION_CURRENT)); + Collections.singletonMap("special", new SimpleAnalyzer()); PerFieldAnalyzerWrapper analyzer = - new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField); + new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), analyzerPerField); try (TokenStream tokenStream = analyzer.tokenStream("field", text)) { CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); @@ -72,8 +72,8 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase { public void testReuseWrapped() throws Exception { final String text = "Qwerty"; - final Analyzer specialAnalyzer = new SimpleAnalyzer(TEST_VERSION_CURRENT); - final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + final Analyzer specialAnalyzer = new SimpleAnalyzer(); + final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(); TokenStream ts1, ts2, ts3, ts4; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java index a75bfa038cd..4ef1536f0d9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java @@ -113,7 +113,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase { output.add(entry.getValue()); } } - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(input.toString())); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter( tokenizer, builder.build())); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index c770de15bbf..0ea5140af2d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.util.Version; /** */ @@ -51,7 +50,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { new Token(new String(ccc, 0, ccc.length), 11, 15), new Token(new String(whitespace, 0, whitespace.length), 16, 20), new Token(new String(empty, 0, empty.length), 21, 21)); - ts = new TrimFilter(TEST_VERSION_CURRENT, ts); + ts = new TrimFilter(ts); assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""}); } @@ -100,7 +99,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); - return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer)); + return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer)); } }; checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); @@ -111,8 +110,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - final Version version = TEST_VERSION_CURRENT; - return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer)); + return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer)); } }; checkOneTerm(a, "", ""); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index 787c9b9ab92..64460df6bc3 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -62,14 +62,14 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; // test that subwords and catenated subwords have // the correct offsets. - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), DEFAULT_WORD_DELIM_TABLE, flags, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "foo", "foobar", "bar" }, new int[] { 5, 5, 9 }, new int[] { 8, 12, 12 }); - wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("foo-bar", 5, 6)), DEFAULT_WORD_DELIM_TABLE, flags, null); + wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 6)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "foo", "bar", "foobar" }, @@ -80,7 +80,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testOffsetChange() throws Exception { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "übelkeit" }, @@ -91,7 +91,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testOffsetChange2() throws Exception { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)), DEFAULT_WORD_DELIM_TABLE, flags, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "übelkeit" }, @@ -102,7 +102,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testOffsetChange3() throws Exception { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "übelkeit" }, @@ -113,7 +113,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testOffsetChange4() throws Exception { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, new String[] { "foo", "foobar", "bar"}, @@ -123,7 +123,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { public void doSplit(final String input, String... output) throws Exception { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, keywordMockTokenizer(input), + WordDelimiterFilter wdf = new WordDelimiterFilter(keywordMockTokenizer(input), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null); assertTokenStreamContents(wdf, output); @@ -167,7 +167,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception { int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS; flags |= (stemPossessive == 1) ? STEM_ENGLISH_POSSESSIVE : 0; - WordDelimiterFilter wdf = new WordDelimiterFilter(TEST_VERSION_CURRENT, keywordMockTokenizer(input), flags, null); + WordDelimiterFilter wdf = new WordDelimiterFilter(keywordMockTokenizer(input), flags, null); assertTokenStreamContents(wdf, output); } @@ -207,14 +207,14 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testPositionIncrements() throws Exception { final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); + final CharArraySet protWords = new CharArraySet(new HashSet<>(Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new Analyzer() { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter( tokenizer, flags, protWords)); } @@ -242,7 +242,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter( new LargePosIncTokenFilter(tokenizer), flags, protWords)); } @@ -275,9 +275,8 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, - tokenizer, StandardAnalyzer.STOP_WORDS_SET); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, filter, flags, protWords)); + StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags, protWords)); } }; @@ -304,7 +303,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, null)); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null)); } }; @@ -324,7 +323,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, null)); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null)); } }; @@ -342,7 +341,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = random().nextInt(512); final CharArraySet protectedWords; if (random().nextBoolean()) { - protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -352,7 +351,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords)); } }; // TODO: properly support positionLengthAttribute @@ -367,7 +366,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = random().nextInt(512); final CharArraySet protectedWords; if (random().nextBoolean()) { - protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -377,7 +376,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords)); } }; // TODO: properly support positionLengthAttribute @@ -391,7 +390,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = i; final CharArraySet protectedWords; if (random.nextBoolean()) { - protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -400,7 +399,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords)); } }; // depending upon options, this thing may or may not preserve the empty term diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java index 062bfc16632..6b7d6b30384 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java @@ -52,7 +52,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { public void testInvalidInput() throws Exception { boolean gotException = false; try { - new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 0, 0); + new EdgeNGramTokenFilter(input, 0, 0); } catch (IllegalArgumentException e) { gotException = true; } @@ -62,7 +62,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { public void testInvalidInput2() throws Exception { boolean gotException = false; try { - new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 1); + new EdgeNGramTokenFilter(input, 2, 1); } catch (IllegalArgumentException e) { gotException = true; } @@ -72,7 +72,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { public void testInvalidInput3() throws Exception { boolean gotException = false; try { - new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, -1, 2); + new EdgeNGramTokenFilter(input, -1, 2); } catch (IllegalArgumentException e) { gotException = true; } @@ -80,23 +80,23 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testFrontUnigram() throws Exception { - EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 1); + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, 1, 1); assertTokenStreamContents(tokenizer, new String[]{"a"}, new int[]{0}, new int[]{5}); } public void testOversizedNgrams() throws Exception { - EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 6); + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, 6, 6); assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0]); } public void testFrontRangeOfNgrams() throws Exception { - EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3); + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, 1, 3); assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5}); } public void testFilterPositions() throws Exception { TokenStream ts = whitespaceMockTokenizer("abcde vwxyz"); - EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 1, 3); + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(ts, 1, 3); assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc","v","vw","vwx"}, new int[]{0,0,0,6,6,6}, @@ -141,7 +141,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { public void testFirstTokenPositionIncrement() throws Exception { TokenStream ts = whitespaceMockTokenizer("a abc"); ts = new PositionFilter(ts); // All but first token will get 0 position increment - EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 2, 3); + EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(ts, 2, 3); // The first token "a" will not be output, since it's smaller than the mingram size of 2. // The second token on input to EdgeNGramTokenFilter will have position increment of 0, // which should be increased to 1, since this is the first output token in the stream. @@ -155,14 +155,14 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { public void testSmallTokenInStream() throws Exception { input = whitespaceMockTokenizer("abc de fgh"); - EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3); + EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, 3, 3); assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10}); } public void testReset() throws Exception { - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("abcde")); - EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 3); + EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, 1, 3); assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5}); tokenizer.setReader(new StringReader("abcde")); assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5}); @@ -179,7 +179,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, - new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max)); + new EdgeNGramTokenFilter(tokenizer, min, max)); } }; checkRandomData(random(), a, 100*RANDOM_MULTIPLIER); @@ -193,17 +193,17 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, - new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15)); + new EdgeNGramTokenFilter(tokenizer, 2, 15)); } }; checkAnalysisConsistency(random, a, random.nextBoolean(), ""); } public void testGraphs() throws IOException { - TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT); + TokenStream tk = new LetterTokenizer(); ((Tokenizer)tk).setReader(new StringReader("abc d efgh ij klmno p q")); tk = new ShingleFilter(tk); - tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10); + tk = new EdgeNGramTokenFilter(tk, 7, 10); assertTokenStreamContents(tk, new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6,11,11,14 }, @@ -221,7 +221,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { final int maxGram = TestUtil.nextInt(random(), minGram, 10); TokenStream tk = new KeywordTokenizer(); ((Tokenizer)tk).setReader(new StringReader(s)); - tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); + tk = new EdgeNGramTokenFilter(tk, minGram, maxGram); final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class); tk.reset(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java index 99b2fb7a6a6..2a09d19a9c4 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java @@ -44,7 +44,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase { public void testInvalidInput() throws Exception { boolean gotException = false; try { - new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 0, 0).setReader(input); + new EdgeNGramTokenizer(0, 0).setReader(input); } catch (IllegalArgumentException e) { gotException = true; } @@ -54,7 +54,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase { public void testInvalidInput2() throws Exception { boolean gotException = false; try { - new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 2, 1).setReader(input); + new EdgeNGramTokenizer(2, 1).setReader(input); } catch (IllegalArgumentException e) { gotException = true; } @@ -64,7 +64,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase { public void testInvalidInput3() throws Exception { boolean gotException = false; try { - new EdgeNGramTokenizer(TEST_VERSION_CURRENT, -1, 2).setReader(input); + new EdgeNGramTokenizer(-1, 2).setReader(input); } catch (IllegalArgumentException e) { gotException = true; } @@ -72,25 +72,25 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase { } public void testFrontUnigram() throws Exception { - EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 1); + EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(1, 1); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"a"}, new int[]{0}, new int[]{1}, 5 /* abcde */); } public void testOversizedNgrams() throws Exception { - EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 6, 6); + EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(6, 6); tokenizer.setReader(input);; assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */); } public void testFrontRangeOfNgrams() throws Exception { - EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 3); + EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(1, 3); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5 /* abcde */); } public void testReset() throws Exception { - EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 3); + EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(1, 3); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5 /* abcde */); tokenizer.setReader(new StringReader("abcde")); @@ -106,7 +106,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, min, max); + Tokenizer tokenizer = new EdgeNGramTokenizer(min, max); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -116,7 +116,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase { } public void testTokenizerPositions() throws Exception { - EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 3); + EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(1, 3); tokenizer.setReader(new StringReader("abcde")); assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java index 5dfc9f4df94..3aa3271d456 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java @@ -50,7 +50,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { public void testInvalidInput() throws Exception { boolean gotException = false; try { - new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 1); + new NGramTokenFilter(input, 2, 1); } catch (IllegalArgumentException e) { gotException = true; } @@ -60,7 +60,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { public void testInvalidInput2() throws Exception { boolean gotException = false; try { - new NGramTokenFilter(TEST_VERSION_CURRENT, input, 0, 1); + new NGramTokenFilter(input, 0, 1); } catch (IllegalArgumentException e) { gotException = true; } @@ -68,17 +68,17 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testUnigrams() throws Exception { - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 1); + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0}); } public void testBigrams() throws Exception { - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2); + NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2); assertTokenStreamContents(filter, new String[]{"ab","bc","cd","de"}, new int[]{0,0,0,0}, new int[]{5,5,5,5}, new int[]{1,0,0,0}); } public void testNgrams() throws Exception { - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3); + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3); assertTokenStreamContents(filter, new String[]{"a","ab","abc","b","bc","bcd","c","cd","cde","d","de","e"}, new int[]{0,0,0,0,0,0,0,0,0,0,0,0}, @@ -90,7 +90,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testNgramsNoIncrement() throws Exception { - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3); + NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3); assertTokenStreamContents(filter, new String[]{"a","ab","abc","b","bc","bcd","c","cd","cde","d","de","e"}, new int[]{0,0,0,0,0,0,0,0,0,0,0,0}, @@ -102,20 +102,20 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testOversizedNgrams() throws Exception { - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7); + NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7); assertTokenStreamContents(filter, new String[0], new int[0], new int[0]); } public void testSmallTokenInStream() throws Exception { input = whitespaceMockTokenizer("abc de fgh"); - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3); + NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3); assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10}, new int[] {1, 2}); } public void testReset() throws Exception { - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("abcde")); - NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1); + NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0}); tokenizer.setReader(new StringReader("abcde")); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0}); @@ -131,7 +131,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); TokenFilter filters = new ASCIIFoldingFilter(tokenizer); - filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2); + filters = new NGramTokenFilter(filters, 2, 2); return new TokenStreamComponents(tokenizer, filters); } }; @@ -152,7 +152,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, - new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max)); + new NGramTokenFilter(tokenizer, min, max)); } }; checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20); @@ -166,14 +166,14 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, - new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15)); + new NGramTokenFilter(tokenizer, 2, 15)); } }; checkAnalysisConsistency(random, a, random.nextBoolean(), ""); } public void testLucene43() throws IOException { - NGramTokenFilter filter = new NGramTokenFilter(Version.LUCENE_4_3, input, 2, 3); + TokenFilter filter = new Lucene43NGramTokenFilter(input, 2, 3); assertTokenStreamContents(filter, new String[]{"ab","bc","cd","de","abc","bcd","cde"}, new int[]{0,1,2,3,0,1,2}, @@ -191,7 +191,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { final int maxGram = TestUtil.nextInt(random(), minGram, 10); TokenStream tk = new KeywordTokenizer(); ((Tokenizer)tk).setReader(new StringReader(s)); - tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); + tk = new NGramTokenFilter(tk, minGram, maxGram); final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class); tk.reset(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java index a7aa2604fb7..7bd19dca0cc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java @@ -48,7 +48,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase { public void testInvalidInput() throws Exception { boolean gotException = false; try { - NGramTokenizer tok = new NGramTokenizer(TEST_VERSION_CURRENT, 2, 1); + NGramTokenizer tok = new NGramTokenizer(2, 1); tok.setReader(input); } catch (IllegalArgumentException e) { gotException = true; @@ -59,7 +59,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase { public void testInvalidInput2() throws Exception { boolean gotException = false; try { - NGramTokenizer tok = new NGramTokenizer(TEST_VERSION_CURRENT, 0, 1); + NGramTokenizer tok = new NGramTokenizer(0, 1); tok.setReader(input); } catch (IllegalArgumentException e) { gotException = true; @@ -68,19 +68,19 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase { } public void testUnigrams() throws Exception { - NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 1); + NGramTokenizer tokenizer = new NGramTokenizer(1, 1); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}, 5 /* abcde */); } public void testBigrams() throws Exception { - NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 2, 2); + NGramTokenizer tokenizer = new NGramTokenizer(2, 2); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"ab","bc","cd","de"}, new int[]{0,1,2,3}, new int[]{2,3,4,5}, 5 /* abcde */); } public void testNgrams() throws Exception { - NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 3); + NGramTokenizer tokenizer = new NGramTokenizer(1, 3); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"a","ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e"}, @@ -95,13 +95,13 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase { } public void testOversizedNgrams() throws Exception { - NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 6, 7); + NGramTokenizer tokenizer = new NGramTokenizer(6, 7); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */); } public void testReset() throws Exception { - NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 1); + NGramTokenizer tokenizer = new NGramTokenizer(1, 1); tokenizer.setReader(input); assertTokenStreamContents(tokenizer, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}, 5 /* abcde */); tokenizer.setReader(new StringReader("abcde")); @@ -116,7 +116,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, min, max); + Tokenizer tokenizer = new NGramTokenizer(min, max); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -161,7 +161,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase { for (int i = 0; i < codePoints.length; ++i) { offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]); } - final Tokenizer grams = new NGramTokenizer(TEST_VERSION_CURRENT, minGram, maxGram, edgesOnly) { + final Tokenizer grams = new NGramTokenizer(minGram, maxGram, edgesOnly) { @Override protected boolean isTokenChar(int chr) { return nonTokenChars.indexOf(chr) < 0; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java index 1f554b44e0b..5df46afcbb0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test the Dutch Stem Filter, which only modifies the term text. @@ -114,14 +113,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { } public void testSnowballCorrectness() throws Exception { - Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new DutchAnalyzer(); checkOneTerm(a, "opheffen", "opheff"); checkOneTerm(a, "opheffende", "opheff"); checkOneTerm(a, "opheffing", "opheff"); } public void testReusableTokenStream() throws Exception { - Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new DutchAnalyzer(); checkOneTerm(a, "lichaamsziek", "lichaamsziek"); checkOneTerm(a, "lichamelijk", "licham"); checkOneTerm(a, "lichamelijke", "licham"); @@ -129,12 +128,12 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { } public void testExclusionTableViaCtor() throws IOException { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet( 1, true); set.add("lichamelijk"); - DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); - a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); + a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); } @@ -144,12 +143,12 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { * even if you use a non-default ctor. */ public void testStemOverrides() throws IOException { - DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET); checkOneTerm(a, "fiets", "fiets"); } public void testEmptyStemDictionary() throws IOException { - DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, + DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET, CharArrayMap.emptyMap()); checkOneTerm(a, "fiets", "fiet"); } @@ -158,17 +157,17 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { * Test that stopwords are not case sensitive */ public void testStopwordsCasing() throws IOException { - DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); + DutchAnalyzer a = new DutchAnalyzer(); assertAnalyzesTo(a, "Zelf", new String[] { }); } private void check(final String input, final String expected) throws Exception { - checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected); + checkOneTerm(new DutchAnalyzer(), input, expected); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new DutchAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new DutchAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java index 98fb8f66964..f3900f87c90 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java @@ -27,12 +27,12 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new NorwegianAnalyzer(TEST_VERSION_CURRENT); + new NorwegianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new NorwegianAnalyzer(); // stemming checkOneTerm(a, "havnedistriktene", "havnedistrikt"); checkOneTerm(a, "havnedistrikter", "havnedistrikt"); @@ -42,8 +42,8 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("havnedistriktene"), false); - Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("havnedistriktene"), false); + Analyzer a = new NorwegianAnalyzer( NorwegianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "havnedistriktene", "havnedistriktene"); checkOneTerm(a, "havnedistrikter", "havnedistrikt"); @@ -51,6 +51,6 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new NorwegianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new NorwegianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java index f5fa09f238e..278577381f8 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java @@ -66,7 +66,7 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java index c04ad4f8c01..a0dbc8671d6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java @@ -65,7 +65,7 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java index 402cf5c3579..4c5cce54950 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java @@ -27,12 +27,12 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new PortugueseAnalyzer(TEST_VERSION_CURRENT); + new PortugueseAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PortugueseAnalyzer(); // stemming checkOneTerm(a, "quilométricas", "quilometric"); checkOneTerm(a, "quilométricos", "quilometric"); @@ -42,8 +42,8 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); - Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); + Analyzer a = new PortugueseAnalyzer( PortugueseAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "quilométricas", "quilométricas"); checkOneTerm(a, "quilométricos", "quilometric"); @@ -51,6 +51,6 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new PortugueseAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new PortugueseAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java index 02a20de016c..d04980f3eb8 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java @@ -92,7 +92,7 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java index 585993380ea..c7a4ebad289 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java @@ -66,7 +66,7 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java index 39a6c685bbc..24421d1f12d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java @@ -66,7 +66,7 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java index b0a6c24e69b..5322a3e561e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java @@ -64,7 +64,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public void testNoStopwords() throws Exception { // Note: an empty list of fields passed in - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.emptyList(), 1); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Collections.emptyList(), 1); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "quick"); assertTokenStreamContents(protectedTokenStream, new String[]{"quick"}); @@ -73,13 +73,13 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { } public void testDefaultStopwordsAllFields() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring } public void testStopwordsAllFieldsMaxPercentDocs() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on terms in > one half of docs remove boring @@ -89,36 +89,36 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { // A filter on terms in > half of docs should not remove vaguelyBoring assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"}); - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, 1f / 4f); protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > quarter of docs should remove vaguelyBoring assertTokenStreamContents(protectedTokenStream, new String[0]); } public void testStopwordsPerFieldMaxPercentDocs() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on one Field should not affect queries on another assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f); protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on the right Field should affect queries on it assertTokenStreamContents(protectedTokenStream, new String[0]); } public void testStopwordsPerFieldMaxDocFreq() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); int numStopWords = protectedAnalyzer.getStopWords("repetitiveField").length; assertTrue("Should have identified stop words", numStopWords > 0); - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10); int numNewStopWords = protectedAnalyzer.getStopWords("repetitiveField").length + protectedAnalyzer.getStopWords("variedField").length; assertTrue("Should have identified more stop words", numNewStopWords > numStopWords); } public void testNoFieldNamePollution() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); + protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // Check filter set up OK @@ -131,7 +131,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public void testTokenStream() throws Exception { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer( - TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), reader, 10); TokenStream ts = a.tokenStream("repetitiveField", "this boring"); assertTokenStreamContents(ts, new String[] { "this" }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java index ac7d225cc7c..28f3c7c3fd2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.reverse; import java.io.IOException; -import java.io.Reader; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; @@ -27,69 +26,68 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.util.Version; public class TestReverseStringFilter extends BaseTokenStreamTestCase { public void testFilter() throws Exception { TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); // 1-4 length string ((Tokenizer)stream).setReader(new StringReader("Do have a nice day")); - ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream); + ReverseStringFilter filter = new ReverseStringFilter(stream); assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" }); } public void testFilterWithMark() throws Exception { TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); // 1-4 length string ((Tokenizer)stream).setReader(new StringReader("Do have a nice day")); - ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001'); + ReverseStringFilter filter = new ReverseStringFilter(stream, '\u0001'); assertTokenStreamContents(filter, new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" }); } public void testReverseString() throws Exception { - assertEquals( "A", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "A" ) ); - assertEquals( "BA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "AB" ) ); - assertEquals( "CBA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "ABC" ) ); + assertEquals( "A", ReverseStringFilter.reverse( "A" ) ); + assertEquals( "BA", ReverseStringFilter.reverse( "AB" ) ); + assertEquals( "CBA", ReverseStringFilter.reverse( "ABC" ) ); } public void testReverseChar() throws Exception { char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' }; - ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 2, 3 ); + ReverseStringFilter.reverse( buffer, 2, 3 ); assertEquals( "ABEDCF", new String( buffer ) ); } public void testReverseSupplementary() throws Exception { // supplementary at end - assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅")); + assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse("瀛愯䇹鍟艱𩬅")); // supplementary at end - 1 - assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅a")); + assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse("瀛愯䇹鍟艱𩬅a")); // supplementary at start - assertEquals("fedcba𩬅", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "𩬅abcdef")); + assertEquals("fedcba𩬅", ReverseStringFilter.reverse("𩬅abcdef")); // supplementary at start + 1 - assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "z𩬅abcdef")); + assertEquals("fedcba𩬅z", ReverseStringFilter.reverse("z𩬅abcdef")); // supplementary medial - assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "abcd𩬅efg")); + assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse("abcd𩬅efg")); } public void testReverseSupplementaryChar() throws Exception { // supplementary at end char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray(); - ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7); + ReverseStringFilter.reverse(buffer, 3, 7); assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer)); // supplementary at end - 1 buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray(); - ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8); + ReverseStringFilter.reverse(buffer, 3, 8); assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer)); // supplementary at start buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray(); - ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7); + ReverseStringFilter.reverse(buffer, 3, 7); assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer)); // supplementary at start + 1 buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray(); - ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8); + ReverseStringFilter.reverse(buffer, 3, 8); assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer)); // supplementary medial buffer = "abc瀛愯𩬅def".toCharArray(); - ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7); + ReverseStringFilter.reverse(buffer, 3, 7); assertEquals("abcfed𩬅愯瀛", new String(buffer)); } @@ -99,7 +97,7 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer)); + return new TokenStreamComponents(tokenizer, new ReverseStringFilter(tokenizer)); } }; checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); @@ -110,7 +108,7 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer)); + return new TokenStreamComponents(tokenizer, new ReverseStringFilter(tokenizer)); } }; checkOneTerm(a, "", ""); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java index 03c96d096ff..7af63248732 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java @@ -27,12 +27,12 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new RomanianAnalyzer(TEST_VERSION_CURRENT); + new RomanianAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new RomanianAnalyzer(); // stemming checkOneTerm(a, "absenţa", "absenţ"); checkOneTerm(a, "absenţi", "absenţ"); @@ -42,8 +42,8 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false); - Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("absenţa"), false); + Analyzer a = new RomanianAnalyzer( RomanianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "absenţa", "absenţa"); checkOneTerm(a, "absenţi", "absenţ"); @@ -51,6 +51,6 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new RomanianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new RomanianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java index fbc683675e1..35dd3efc514 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test case for RussianAnalyzer. @@ -33,12 +32,12 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { /** Check that RussianAnalyzer doesnt discard any numbers */ public void testDigitsInRussianCharset() throws IOException { - RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT); + RussianAnalyzer ra = new RussianAnalyzer(); assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" }); } public void testReusableTokenStream() throws Exception { - Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new RussianAnalyzer(); assertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" }); assertAnalyzesTo(a, "Но знание это хранилось в тайне", @@ -47,9 +46,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { public void testWithStemExclusionSet() throws Exception { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet( 1, true); set.add("представление"); - Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set); + Analyzer a = new RussianAnalyzer( RussianAnalyzer.getDefaultStopSet() , set); assertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); @@ -57,6 +56,6 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new RussianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new RussianAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java index 9433c9599ab..1d4a381e9d0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java @@ -49,7 +49,7 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("энергии"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("энергии"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java index bf747bdd8dc..514ad9a3095 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java @@ -309,9 +309,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { Analyzer delegate = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "into"); + CharArraySet stopSet = StopFilter.makeStopSet("into"); Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenFilter filter = new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet); + TokenFilter filter = new StopFilter(tokenizer, stopSet); return new TokenStreamComponents(tokenizer, filter); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java index d407277dd27..83bca7dcc81 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java @@ -981,7 +981,7 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase { } public void testReset() throws Exception { - Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + Tokenizer wsTokenizer = new WhitespaceTokenizer(); wsTokenizer.setReader(new StringReader("please divide this sentence")); TokenStream filter = new ShingleFilter(wsTokenizer, 2); assertTokenStreamContents(filter, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java index addf2d8b20c..eeb81a46508 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java @@ -165,7 +165,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"}); source1.reset(); - TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); + TokenStream lowerCasing = new LowerCaseFilter(source1); String[] lowerCaseTokens = new String[tokens1.length]; for (int i = 0; i < tokens1.length; i++) lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT); @@ -173,7 +173,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { } private StandardTokenizer standardTokenizer(StringBuilder builder) throws IOException { - StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); + StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader(builder.toString())); return tokenizer; } @@ -191,10 +191,10 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' '); } //make sure we produce the same tokens - TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer))); + TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer))); TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100)); teeStream.consumeAllTokens(); - TokenStream stream = new ModuloTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer)), 100); + TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100); CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class); CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class); for (int i=0; stream.incrementToken(); i++) { @@ -207,12 +207,12 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { int tfPos = 0; long start = System.currentTimeMillis(); for (int i = 0; i < 20; i++) { - stream = new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer)); + stream = new StandardFilter(standardTokenizer(buffer)); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { tfPos += posIncrAtt.getPositionIncrement(); } - stream = new ModuloTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer)), modCounts[j]); + stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), modCounts[j]); posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { tfPos += posIncrAtt.getPositionIncrement(); @@ -224,7 +224,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { //simulate one field with one sink start = System.currentTimeMillis(); for (int i = 0; i < 20; i++) { - teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer))); + teeStream = new TeeSinkTokenFilter(new StandardFilter( standardTokenizer(buffer))); sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j])); PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class); while (teeStream.incrementToken()) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java index 75525559a71..6220f9ce5f9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java @@ -27,12 +27,12 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new SwedishAnalyzer(TEST_VERSION_CURRENT); + new SwedishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new SwedishAnalyzer(); // stemming checkOneTerm(a, "jaktkarlarne", "jaktkarl"); checkOneTerm(a, "jaktkarlens", "jaktkarl"); @@ -42,8 +42,8 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false); - Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT, + CharArraySet exclusionSet = new CharArraySet( asSet("jaktkarlarne"), false); + Analyzer a = new SwedishAnalyzer( SwedishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "jaktkarlarne", "jaktkarlarne"); checkOneTerm(a, "jaktkarlens", "jaktkarl"); @@ -51,6 +51,6 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SwedishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SwedishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java index c25ade6b6a9..ef87a8e3a67 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java @@ -49,7 +49,7 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false); + final CharArraySet exclusionSet = new CharArraySet( asSet("jaktkarlens"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java index 59d85c26615..36897b7c5bd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java @@ -100,7 +100,7 @@ public class TestSolrSynonymParser extends BaseTokenStreamTestCase { @Test(expected=ParseException.class) public void testInvalidPositionsInput() throws Exception { String testFile = "testola => the test"; - SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT)); + SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer()); parser.parse(new StringReader(testFile)); } @@ -108,7 +108,7 @@ public class TestSolrSynonymParser extends BaseTokenStreamTestCase { @Test(expected=ParseException.class) public void testInvalidPositionsOutput() throws Exception { String testFile = "the test => testola"; - SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT)); + SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer()); parser.parse(new StringReader(testFile)); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index a7273ca4385..ab502f00cba 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -46,14 +46,14 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { * testcase for offsets */ public void testOffsets() throws Exception { - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "การที่ได้ต้องแสดงว่างานดี", + assertAnalyzesTo(new ThaiAnalyzer(CharArraySet.EMPTY_SET), "การที่ได้ต้องแสดงว่างานดี", new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 13, 17, 20, 23 }, new int[] { 3, 6, 9, 13, 17, 20, 23, 25 }); } public void testStopWords() throws Exception { - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี", + assertAnalyzesTo(new ThaiAnalyzer(), "การที่ได้ต้องแสดงว่างานดี", new String[] { "แสดง", "งาน", "ดี" }, new int[] { 13, 20, 23 }, new int[] { 17, 23, 25 }, @@ -65,7 +65,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { */ // note this test uses stopfilter's stopset public void testPositionIncrements() throws Exception { - final ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, StopAnalyzer.ENGLISH_STOP_WORDS_SET); + final ThaiAnalyzer analyzer = new ThaiAnalyzer(StopAnalyzer.ENGLISH_STOP_WORDS_SET); assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 18, 22, 25, 28 }, @@ -81,7 +81,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + ThaiAnalyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET); assertAnalyzesTo(analyzer, "", new String[] {}); assertAnalyzesTo( @@ -97,18 +97,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ThaiAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ThaiAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new ThaiAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); } // LUCENE-3044 public void testAttributeReuse() throws Exception { - ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT); + ThaiAnalyzer analyzer = new ThaiAnalyzer(); // just consume TokenStream ts = analyzer.tokenStream("dummy", "ภาษาไทย"); assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); @@ -119,7 +119,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { } public void testTwoSentences() throws Exception { - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "This is a test. การที่ได้ต้องแสดงว่างานดี", + assertAnalyzesTo(new ThaiAnalyzer(CharArraySet.EMPTY_SET), "This is a test. การที่ได้ต้องแสดงว่างานดี", new String[] { "this", "is", "a", "test", "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 5, 8, 10, 16, 19, 22, 25, 29, 33, 36, 39 }, new int[] { 4, 7, 9, 14, 19, 22, 25, 29, 33, 36, 39, 41 }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java index 85bd371e73b..bc40ed5bad8 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java @@ -27,12 +27,12 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new TurkishAnalyzer(TEST_VERSION_CURRENT); + new TurkishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new TurkishAnalyzer(); // stemming checkOneTerm(a, "ağacı", "ağaç"); checkOneTerm(a, "ağaç", "ağaç"); @@ -45,15 +45,14 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ağacı"), false); - Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT, - TurkishAnalyzer.getDefaultStopSet(), exclusionSet); + CharArraySet exclusionSet = new CharArraySet(asSet("ağacı"), false); + Analyzer a = new TurkishAnalyzer(TurkishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "ağacı", "ağacı"); checkOneTerm(a, "ağaç", "ağaç"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new TurkishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new TurkishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java index 9c137c21372..fdc830fb2f5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java @@ -25,7 +25,7 @@ import org.apache.lucene.util.LuceneTestCase; public class TestCharArrayMap extends LuceneTestCase { public void doRandom(int iter, boolean ignoreCase) { - CharArrayMap map = new CharArrayMap<>(TEST_VERSION_CURRENT, 1, ignoreCase); + CharArrayMap map = new CharArrayMap<>(1, ignoreCase); HashMap hmap = new HashMap<>(); char[] key; @@ -64,7 +64,7 @@ public class TestCharArrayMap extends LuceneTestCase { } public void testMethods() { - CharArrayMap cm = new CharArrayMap<>(TEST_VERSION_CURRENT, 2, false); + CharArrayMap cm = new CharArrayMap<>(2, false); HashMap hm = new HashMap<>(); hm.put("foo",1); hm.put("bar",2); @@ -133,7 +133,7 @@ public class TestCharArrayMap extends LuceneTestCase { } public void testModifyOnUnmodifiable(){ - CharArrayMap map = new CharArrayMap<>(TEST_VERSION_CURRENT, 2, false); + CharArrayMap map = new CharArrayMap<>(2, false); map.put("foo",1); map.put("bar",2); final int size = map.size(); @@ -230,7 +230,7 @@ public class TestCharArrayMap extends LuceneTestCase { } public void testToString() { - CharArrayMap cm = new CharArrayMap<>(TEST_VERSION_CURRENT, Collections.singletonMap("test",1), false); + CharArrayMap cm = new CharArrayMap<>(Collections.singletonMap("test",1), false); assertEquals("[test]",cm.keySet().toString()); assertEquals("[1]",cm.values().toString()); assertEquals("[test=1]",cm.entrySet().toString()); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java index 9af7447588e..57e9396347a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.util; import java.util.*; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.Version; public class TestCharArraySet extends LuceneTestCase { @@ -35,7 +34,7 @@ public class TestCharArraySet extends LuceneTestCase { public void testRehash() throws Exception { - CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); + CharArraySet cas = new CharArraySet(0, true); for(int i=0;i would not hit any element of the CAS and therefor never call // remove() on the iterator try{ - set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true)); + set.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected @@ -152,7 +151,7 @@ public class TestCharArraySet extends LuceneTestCase { } try{ - set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true)); + set.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected @@ -173,7 +172,7 @@ public class TestCharArraySet extends LuceneTestCase { } public void testUnmodifiableSet(){ - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); + CharArraySet set = new CharArraySet(10,true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); set.add(Integer.valueOf(1)); final int size = set.size(); @@ -203,7 +202,7 @@ public class TestCharArraySet extends LuceneTestCase { "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; String[] lowerArr = new String[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true); + CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); } @@ -211,7 +210,7 @@ public class TestCharArraySet extends LuceneTestCase { assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } - set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); + set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } @@ -229,7 +228,7 @@ public class TestCharArraySet extends LuceneTestCase { String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays + CharArraySet set = new CharArraySet(Arrays .asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); @@ -238,7 +237,7 @@ public class TestCharArraySet extends LuceneTestCase { assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } - set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), + set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); @@ -252,8 +251,8 @@ public class TestCharArraySet extends LuceneTestCase { @SuppressWarnings("deprecated") public void testCopyCharArraySetBWCompat() { - CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); - CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); + CharArraySet setIngoreCase = new CharArraySet(10, true); + CharArraySet setCaseSensitive = new CharArraySet(10, false); List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList<>(); @@ -265,8 +264,8 @@ public class TestCharArraySet extends LuceneTestCase { setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); setCaseSensitive.add(Integer.valueOf(1)); - CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); - CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); + CharArraySet copy = CharArraySet.copy(setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); @@ -299,8 +298,8 @@ public class TestCharArraySet extends LuceneTestCase { * Test the static #copy() function with a CharArraySet as a source */ public void testCopyCharArraySet() { - CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); - CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); + CharArraySet setIngoreCase = new CharArraySet(10, true); + CharArraySet setCaseSensitive = new CharArraySet(10, false); List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList<>(); @@ -312,8 +311,8 @@ public class TestCharArraySet extends LuceneTestCase { setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); setCaseSensitive.add(Integer.valueOf(1)); - CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); - CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); + CharArraySet copy = CharArraySet.copy(setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); @@ -355,7 +354,7 @@ public class TestCharArraySet extends LuceneTestCase { } set.addAll(Arrays.asList(TEST_STOP_WORDS)); - CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set); + CharArraySet copy = CharArraySet.copy(set); assertEquals(set.size(), copy.size()); assertEquals(set.size(), copy.size()); @@ -380,12 +379,12 @@ public class TestCharArraySet extends LuceneTestCase { } /** - * Tests a special case of {@link CharArraySet#copy(Version, Set)} where the + * Tests a special case of {@link CharArraySet#copy(Set)} where the * set to copy is the {@link CharArraySet#EMPTY_SET} */ public void testCopyEmptySet() { assertSame(CharArraySet.EMPTY_SET, - CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET)); + CharArraySet.copy(CharArraySet.EMPTY_SET)); } /** @@ -408,7 +407,7 @@ public class TestCharArraySet extends LuceneTestCase { * Test for NPE */ public void testContainsWithNull() { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + CharArraySet set = new CharArraySet(1, true); try { set.contains((char[]) null, 0, 10); fail("null value must raise NPE"); @@ -424,7 +423,7 @@ public class TestCharArraySet extends LuceneTestCase { } public void testToString() { - CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test")); + CharArraySet set = CharArraySet.copy(Collections.singleton("test")); assertEquals("[test]", set.toString()); set.add("test2"); assertTrue(set.toString().contains(", ")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java index a470c9fefaa..0ed68dab40b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java @@ -52,7 +52,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { } // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.insert(1023, "\ud801\udc1c"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString())); assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" ")); } @@ -70,7 +70,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { builder.append("a"); } builder.append("\ud801\udc1cabc"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)}); } @@ -85,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { for (int i = 0; i < 255; i++) { builder.append("A"); } - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } @@ -100,7 +100,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { builder.append("A"); } builder.append("\ud801\udc1c"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } @@ -110,7 +110,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()) { + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) { @Override protected int normalize(int c) { if (c > 0xffff) { @@ -148,7 +148,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()) { + Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) { @Override protected int normalize(int c) { if (c <= 0xffff) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java index f31f913a332..b39b4cda73b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java @@ -46,7 +46,7 @@ public class TestCharacterUtils extends LuceneTestCase { } catch (IndexOutOfBoundsException e) { } - CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + CharacterUtils java5 = CharacterUtils.getInstance(); assertEquals((int) 'A', java5.codePointAt(cpAt3, 0)); assertEquals(Character.toCodePoint('\ud801', '\udc1c'), java5.codePointAt( cpAt3, 3)); @@ -68,7 +68,7 @@ public class TestCharacterUtils extends LuceneTestCase { assertEquals((int) '\ud801', java4.codePointAt(cpAt3, 3, 5)); assertEquals((int) '\ud801', java4.codePointAt(highSurrogateAt3, 3, 4)); - CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + CharacterUtils java5 = CharacterUtils.getInstance(); assertEquals((int) 'A', java5.codePointAt(cpAt3, 0, 2)); assertEquals(Character.toCodePoint('\ud801', '\udc1c'), java5.codePointAt( cpAt3, 3, 5)); @@ -78,7 +78,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testCodePointCount() { CharacterUtils java4 = CharacterUtils.getJava4Instance(); - CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + CharacterUtils java5 = CharacterUtils.getInstance(); final String s = TestUtil.randomUnicodeString(random()); assertEquals(s.length(), java4.codePointCount(s)); assertEquals(Character.codePointCount(s, 0, s.length()), java5.codePointCount(s)); @@ -87,7 +87,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testOffsetByCodePoint() { CharacterUtils java4 = CharacterUtils.getJava4Instance(); - CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + CharacterUtils java5 = CharacterUtils.getInstance(); for (int i = 0; i < 10; ++i) { final char[] s = TestUtil.randomUnicodeString(random()).toCharArray(); final int index = TestUtil.nextInt(random(), 0, s.length); @@ -119,7 +119,7 @@ public class TestCharacterUtils extends LuceneTestCase { public void testConversions() { CharacterUtils java4 = CharacterUtils.getJava4Instance(); - CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + CharacterUtils java5 = CharacterUtils.getInstance(); testConversions(java4); testConversions(java5); } @@ -159,7 +159,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testFillNoHighSurrogate() throws IOException { CharacterUtils versions[] = new CharacterUtils[] { - CharacterUtils.getInstance(TEST_VERSION_CURRENT), + CharacterUtils.getInstance(), CharacterUtils.getJava4Instance() }; for (CharacterUtils instance : versions) { Reader reader = new StringReader("helloworld"); @@ -181,7 +181,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testFillJava15() throws IOException { String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801"; - CharacterUtils instance = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + CharacterUtils instance = CharacterUtils.getInstance(); Reader reader = new StringReader(input); CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5); assertTrue(instance.fill(buffer, reader)); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java index 788eb373405..61822839211 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java @@ -40,9 +40,9 @@ public class TestElision extends BaseTokenStreamTestCase { public void testElision() throws Exception { String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); + Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(test)); - CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false); + CharArraySet articles = new CharArraySet(asSet("l", "M"), false); TokenFilter filter = new ElisionFilter(tokenizer, articles); List tas = filter(filter); assertEquals("embrouille", tas.get(4)); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java index 24515512cd5..2c5f9084b07 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java @@ -50,8 +50,7 @@ public class TestFilesystemResourceLoader extends LuceneTestCase { private void assertClasspathDelegation(ResourceLoader rl) throws Exception { // try a stopwords file from classpath CharArraySet set = WordlistLoader.getSnowballWordSet( - new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8), - TEST_VERSION_CURRENT + new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8) ); assertTrue(set.contains("you")); // try to load a class; we use string comparison because classloader may be different... diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java index ac33b30dc8d..bd2ebdba71f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java @@ -29,15 +29,15 @@ public class TestWordlistLoader extends LuceneTestCase { public void testWordlistLoading() throws IOException { String s = "ONE\n two \nthree"; - CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT); + CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s)); checkSet(wordSet1); - CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)), TEST_VERSION_CURRENT); + CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s))); checkSet(wordSet2); } public void testComments() throws Exception { String s = "ONE\n two \nthree\n#comment"; - CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT); + CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#"); checkSet(wordSet1); assertFalse(wordSet1.contains("#comment")); assertFalse(wordSet1.contains("comment")); @@ -66,7 +66,7 @@ public class TestWordlistLoader extends LuceneTestCase { " two \n" + // stopword with leading/trailing space " three four five \n" + // multiple stopwords "six seven | comment\n"; //multiple stopwords + comment - CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT); + CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s)); assertEquals(7, wordset.size()); assertTrue(wordset.contains("ONE")); assertTrue(wordset.contains("two")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java index a3547f2bc07..3e1abb36435 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java @@ -35,7 +35,7 @@ public class TestCollationKeyAnalyzer extends CollationTestBase { // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. private Collator collator = Collator.getInstance(new Locale("ar")); - private Analyzer analyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator); + private Analyzer analyzer = new CollationKeyAnalyzer(collator); private BytesRef firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); private BytesRef firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray()); @@ -65,7 +65,7 @@ public class TestCollationKeyAnalyzer extends CollationTestBase { for (int i = 0; i < iters; i++) { Collator collator = Collator.getInstance(Locale.GERMAN); collator.setStrength(Collator.PRIMARY); - assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator)); + assertThreadSafe(new CollationKeyAnalyzer(collator)); } } } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java index 3e8bfa32824..af5ba0ff21d 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java @@ -77,7 +77,7 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase { CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)); - Tokenizer tokenStream = new NGramTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), 1, 1); + Tokenizer tokenStream = new NGramTokenizer(newAttributeFactory(), 1, 1); tokenStream.setReader(reader); assertTokenStreamContents(tokenStream, diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java index 55dd9466fe7..a4e5c3c3847 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.icu.segmentation; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -43,7 +42,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); TokenStream result = new CJKBigramFilter(source); - return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET)); + return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); } }; @@ -61,7 +60,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { // some halfwidth katakana forms, which will affect the bigramming. TokenStream result = new ICUNormalizer2Filter(source); result = new CJKBigramFilter(source); - return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET)); + return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); } }; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java index f738e4a10b3..fe3dc2e6bb6 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ja; */ import java.io.IOException; -import java.io.Reader; import java.util.HashSet; import java.util.Set; @@ -31,7 +30,6 @@ import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.apache.lucene.analysis.ja.dict.UserDictionary; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; -import org.apache.lucene.util.Version; /** * Analyzer for Japanese that uses morphological analysis. @@ -42,12 +40,12 @@ public class JapaneseAnalyzer extends StopwordAnalyzerBase { private final Set stoptags; private final UserDictionary userDict; - public JapaneseAnalyzer(Version matchVersion) { - this(matchVersion, null, JapaneseTokenizer.DEFAULT_MODE, DefaultSetHolder.DEFAULT_STOP_SET, DefaultSetHolder.DEFAULT_STOP_TAGS); + public JapaneseAnalyzer() { + this(null, JapaneseTokenizer.DEFAULT_MODE, DefaultSetHolder.DEFAULT_STOP_SET, DefaultSetHolder.DEFAULT_STOP_TAGS); } - public JapaneseAnalyzer(Version matchVersion, UserDictionary userDict, Mode mode, CharArraySet stopwords, Set stoptags) { - super(matchVersion, stopwords); + public JapaneseAnalyzer(UserDictionary userDict, Mode mode, CharArraySet stopwords, Set stoptags) { + super(stopwords); this.userDict = userDict; this.mode = mode; this.stoptags = stoptags; @@ -89,11 +87,11 @@ public class JapaneseAnalyzer extends StopwordAnalyzerBase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new JapaneseTokenizer(userDict, true, mode); TokenStream stream = new JapaneseBaseFormFilter(tokenizer); - stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags); + stream = new JapanesePartOfSpeechStopFilter(stream, stoptags); stream = new CJKWidthFilter(stream); - stream = new StopFilter(matchVersion, stream, stopwords); + stream = new StopFilter(stream, stopwords); stream = new JapaneseKatakanaStemFilter(stream); - stream = new LowerCaseFilter(matchVersion, stream); + stream = new LowerCaseFilter(stream); return new TokenStreamComponents(tokenizer, stream); } } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java index 476a723abe1..11bb4e67e62 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java @@ -22,7 +22,6 @@ import java.util.Set; import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.util.Version; /** * Removes tokens that match a set of part-of-speech tags. @@ -33,12 +32,11 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter { /** * Create a new {@link JapanesePartOfSpeechStopFilter}. - * @param version the Lucene match version * @param input the {@link TokenStream} to consume * @param stopTags the part-of-speech tags that should be removed */ - public JapanesePartOfSpeechStopFilter(Version version, TokenStream input, Set stopTags) { - super(version, input); + public JapanesePartOfSpeechStopFilter(TokenStream input, Set stopTags) { + super(input); this.stopTags = stopTags; } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java index 18cc27a36da..3e9365ac3ac 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java @@ -70,7 +70,7 @@ public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory im public TokenStream create(TokenStream stream) { // if stoptags is null, it means the file is empty if (stopTags != null) { - final TokenStream filter = new JapanesePartOfSpeechStopFilter(luceneMatchVersion, stream, stopTags); + final TokenStream filter = new JapanesePartOfSpeechStopFilter(stream, stopTags); return filter; } else { return stream; diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java index 65e55aac50e..90cc00e83e1 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java @@ -33,7 +33,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new JapaneseAnalyzer(TEST_VERSION_CURRENT); + new JapaneseAnalyzer(); } /** @@ -42,7 +42,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { * and offsets are correct. */ public void testBasics() throws IOException { - assertAnalyzesTo(new JapaneseAnalyzer(TEST_VERSION_CURRENT), "多くの学生が試験に落ちた。", + assertAnalyzesTo(new JapaneseAnalyzer(), "多くの学生が試験に落ちた。", new String[] { "多く", "学生", "試験", "落ちる" }, new int[] { 0, 3, 6, 9 }, new int[] { 2, 5, 8, 11 }, @@ -55,7 +55,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { */ public void testDecomposition() throws IOException { - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); @@ -110,7 +110,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { ); // Kyoto University Baseball Club - assertAnalyzesToPositions(new JapaneseAnalyzer(TEST_VERSION_CURRENT), "京都大学硬式野球部", + assertAnalyzesToPositions(new JapaneseAnalyzer(), "京都大学硬式野球部", new String[] { "京都大", "学", "硬式", @@ -127,7 +127,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { */ public void testRandom() throws IOException { Random random = random(); - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkRandomData(random, a, atLeast(10000)); @@ -136,7 +136,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 8192); @@ -146,7 +146,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { // user dict to analyzer works: public void testUserDict3() throws Exception { // Test entry that breaks into multiple tokens: - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, TestJapaneseTokenizer.readDict(), + final Analyzer a = new JapaneseAnalyzer(TestJapaneseTokenizer.readDict(), Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); @@ -163,7 +163,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void testCuriousString() throws Exception { Random random = random(); final String s = "<li>06:26 2004年3月21日 [[利用者:Kzhr|Kzhr]] "お菓子な家族" を削除しました <em><nowiki>(即時削除: 悪戯。内容: &#39;KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK&#39;)</nowiki></em></li>"; - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkAnalysisConsistency(random, a, random.nextBoolean(), s); @@ -174,7 +174,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void testAnotherCuriousString() throws Exception { Random random = random(); final String s = "《〔〘〝」〩〄〯』〴〷〦〯〹】〰。 〆。〡〢〲〆〤〫〱 〜々〲〿〄》〃】〚〗〪〓〨々〮〹〟〯〫』」〨〒〜〃〃〡 〸〜〱〆〿「〱〳。〷〆〃〷〇〛〥〒〖〪〕〦〚〉〷〼〣〒。〕〣〻〒〻〼〔〸〿〖〖〆々〭《〟〚〇〕〸〲〄〿〙」〞〖〪〬〤【〵〘〃々〦〮〠〦〛〲〝〿〽〓〺〷〛》〛『》〇 〽〄〱〙〥〠』〨〉〨〔」》〮〥〽〔〰〄〶】〠〶〨〔々『。〞〙〮》【 〯〦〯〩〩〈〿〫〘〒》』〾〰〰〼〒「〝〰〱〞〹〔〪〭、〬〴【』〧〩】〈。〧〤〢〨〶〄〴〡。〪〭〞〷〣〘〳〄〬〙『 「」【〮〯〔〱〬〴〵〭〬〚〱、〚〣、〚〓〮、〚々】〼〿〦〫〛〲〆〕々。〨〩〇〫〵『『〣〮〜〫〃】〡〯』〆〫〺〻〬〺、〗】〓〕〶〇〞〬。」〃〮〇〞〷〰〲】〆〻。〬〻〄〜〃〲〺〧〘〇〈、〃〚〇〉「〬〣〨〮〆〴〻〒〖〄〒〳〗〶、〙「 〫〚《〩〆〱〡【〶』【〆〫】〢》〔。〵〴〽々〱〖〳〶〱《〈〒』『〝〘【〈〢〝〠〣「〤〆〢〈〚〕〿〣々〢〹〉〡 〷《〤〴『々〉〤〬《』々〾〔〚〆〔〴〪〩〸〦』〉〃 《〼〇〆〾〛〿」〧〝〽〘〠〻【〰〨〥《〯〝〩〩〱〇〳々〚〉〔『〹〳〳』〲『〣」〯〓【々〮〥〃〿〳〞〦〦〶〓〬〛〬〈〈〠『〜〥〒〯〜〜〹〲【〓〪《々〗〚〇〜〄〦『々〃〒〇〖〢〉〹〮〩〽『》〵〔】〣〮】〧、〇〰〒】《〈〆々〾〣【〾〲〘〧『〇〲〼〕〙「〪〆〚々〦〯〵〇〤〆〡」〪》〼』〴〶〪】『〲〢〭〬〈〠〮〽〓〔〧〖」〃〴〬』〣〝〯〣〴『〉〖〄〇〄〰〇〃〤、〤》〔〴〯〫〠〝〷〞〩〛〛〳々〓〟〜〛〜〃 〃〛「、』》》々〢〱〢〸〹〙〃〶〇〮〼」〔〶【〙〮々〣 〵〱〈〡〙〹、〶〘【〘〄〔『〸〵〫〱〈〙〜〸〩〗〷》〽〃〔〕〡〨〆〺〒〧〴〢〈〯〶〼〚〈〪〘〢〘〶〿〾〹〆〉」〠〴〭〉〡〮〫〸〸〦〟〣」〩〶』《〔〨〫〉〃〚〈〡〾〈〵【〼《〴〸〜〜〓《〡〶〫〉〫〼〱〿〢々〩〡〘〓〛〞〖々〢〩「々〦〣】〤〫〼〚〴〡〠〕〴〭。〟「〞》』「、〛〕〤々〈〺〃〸】〶〽〒〓〙》〶〬〸〧〜〲〬〰〪。〞〒【〭〇〢〝〧〰〹〾》〖「〹」〶〕〜〘〿〩〙〺〡〓〆〵〪〬〨〷〯〃】〤〤〞〸》〈〹〖〲〣〬〲〯〗〉〮「〼〨〓々。〭〆〶〩【〦〿》〩〻〢〔〤〟〯【〷〻〚〟」〗《〓〛。〰〃〭〯〘〣》〩〩〆」【〼〡】〳〿〫〳〼〺〶『〟〧』〳〲〔『〦「〳〃〫〷《〟〶〻〪〆〗〲〮〄〨〻』〟〜〓〣〴〓〉、〷〄〝〭〻〲〽〼〥〒〚〬〙〦〓〢〦〒〄。〛〩〿〹「〶〬〖〬〾〭〽〕〲〤〕〚〢〪〸〠〸〠〓〇〄〽〖】〵〮〦〲〸〉〫〢〹〼〗〱〮〢」〝〽〹「〭〥「〠〆〕〃〫々【『〣〝々〧〒〒】〬〖〘〗〰〭〢〠〨〖〶〒》〪〺〇〡》〦〝〾〴〸〓〛〟〞」〓〜。〡』々》〃〼』〨〾】〜〵々〥【〉〾〭〹〯〔〢〺〳〹〜〢〄〵〵〱。〯〹〺〣〭〉〛々〧〫々〛〪。〠〰〖〒〦〠〩〣〾〺〫〬、》「〚〫〲〸〶〧〞〯〨」】【〚〲『〽〡》〘〣〒〕〸『〼〘〿〘〽〤〿〶〫〆〾〔〃〱〫〱〧、 〒〰。〜〸〇〜〔〉〡〬〿〝〼〉〷、〠〘〉』〥〫〧〕》》〡〻〨〲〔〠〮】〰〮」〧〬《〦〼〽〵〭「〷〮〈〴〔〭、〣〔〥〱〔」〄〘》〡〣》〴〙〜〖〬〺〯々〟〗〥〥【〝〨〝〽〼〚。〙』〤〬〞〜〣〮〬〳〽〦〩 〶」〠〄〳〠〇〜〒〶〱々〠『〡』〭〰》〴〉〫〬〒《〽『〉〳〵〄〨〮〔〭〞』〡〚〩〦、〠【〓〯〬〦〛〽〉〜〻『〗〫〞〩〃〼〿〡〕〯 〸』》〼〮〆」〼〪〇〭〣〗〓〻〧」〙〳〱〥〳、〓〕〮〫》〧〃《〣』〹〬〣〶〡〾〙〮〕〶〧《 〨〇〺〳〉《《〕〜〰〱〕〛「〞〩 〓〢〄〣〼〢〽〇〛〟〖〘〳〤〫〡〫〬〦〘〪〶〝《』〜〕〝】〄〡〳〹々〯【〝〝〇〔〹〿〥〄〚〒〻『〺〮〇〲〒〾〙〞〉】〉〪〫〴〒〔〨〮〰〻〷〿〥〮〼〹〩〱〞〈〴〦〰〞【〻〾、〵〻〛〮、〻《〘〱〫〾〄〄〙、〔〖〘 》〻〧〦〃〣〬〰〗】〸〵。〄。〷〄〸〟〰〓、【〖〰〢〾〘〆》〜〶〻》〔〛』〦〩〷〴〃〴〫〱《「〖々〖〒〡〞。〱〡〖〤〫〇〜〒〴〯》〪〶》〘〨》》【〵〹」〤〯《〦〶〯〃〧〙〩」「〤】》々〣〱〯〞〰〢々〵〷〺〾〺〜〜〚〣〿〩〰《〄『〧《〜 〷〓〺〦々〚〨「〧〮《〥〸〞【〡〩〩〱〴〗〙〿『〇〭〖 〹〥〲『〗〛〯〷〃〽〝《〳《〡】〄々〱〆〯〦。〒『〡》〨〃〦」〬〄〬〔〭〫〼〲】』〗〔〼〴〹〠」〺〬〺〔々》〾〿「〺〖〤》〴〶〣〚〒【〤〄】〹〺〟〃〜、】〪〚〯〢〹、〶〖〭】〾〠」〉〆〾々〯〈〙〞〶〩】〺〟〫〽〫〸〵〛〙〃「〰〫〓』〻「〦〤〖〺〇。〨〟 〦〙〘〨〸〒〣〈〩〜〧〾〒〕〤 〇〴〮〝〈〿〢〴〟〷〭〴】〽〇〟〦〬〶〲。〫〸〮〝〆〸〄〣〦〲〢〇〫〻〹〕〶〥〖。〨〬【〥〽〓〵〯、〒〉〳〘〧〼〆〹〉〾〬〽】〹〲《〜〨〟〡〪〱〃〓〬〜〧〝〸、〢〝〦』〝〸》】〩〡〉〫〛〇〢〖〔〠〹〧〕〨〃〙「〲〗〙『〛。。《〸〔〾〧〉〠』〡〼〄〨〲〥〼〠〻 「〸〩〟〷【〮〜〧〿〾〜〈。〣〰〪〘〮〴〨【〩〜〟〟〼〻〦〝「〺〝〄〵〝〲〃〨〺〫〜 〮」〡』〜〿、〪々〕〫〃〒〔〛〻〲〹》【〚〣〯《〢〙〕〝〾〙〭〄〕〗〄〪〵〃〘〺〻〤〟〢〻〆〥〝】〠〬〧〾〮々〪〓』〷』〿〕〒〽、〷〉」〨〨 〄〽〾「〧〴〜〢〮〚〆〣《〬〺〟〥〼〛〆〓〚々〇々〈〉〗〨〳々〣〭〯〉【〩〮〺〪』〭〚〉〦、〃〘〦〮」〴〆〴〔〴〜「〠〴【〰「〫〳〟〾〶〉〨〲〚〩〷〄《〄〝〈。〧〟〳〃〹々〃〄〭〬〰、〥〬〸〱〉〩〴《〔【〠〳〪〧〫〽〓〭】〧」〮〒〸〤。〩』〭〖〛〭〯〨〕〞〮〞〬〹〺々〽〡〷〪〶「〹〯〝々〭〠〼〰「〒〉」」〡〆〜〾〪〾》〇〙『〚〿〽】〛〮〶〚」《〔〔〣、〄〗〩〭〠」〠〰〞《〸〧〺〰」『〾〯〃〓〓〩〣〚【〜〭 〝〨〗〷〒《〫〝〶〘〣〿〜〱〾〨〥〘〃〳〆〇〈〜〲〪〡〶〭〤『〝〖〷〦〾〬〟〠〳〻、」【〣『〺〞〴〳「〵〺〨々〩〰〢〧〣〃『〹」〉〓〘〦〣〄〕〞〵〧〜」〴〠〱》〮〬〄〶〆〬」〘 〺。〲。 〾〷〕〛〣〾〗、〭』〭〧〝。〮」々『〻〒〣「〳〩〪〝〒〥〻〘〰〼〭〆〷〭「」〚〔〬〃〝〮〩〪〽〱。〯〯〰〨〿〷「々》【〴〧〻〰、〶〡〹〩〡〺〲〼。〩〿〯』〟〴〼〦〤〙〢〩〔〲〆〗〲《〟〤〬〷〧〫〧〗〞〣〚〚〧〭〮〛〲〮々〩〩〕〬々 〥〸、〢。〿〵〺〤〲〝 〥》々〰」〮〩〛〛』々『〹〞〃〃々〚【〱。。〹〨〿〻〣〞〨〈〤〼〃〻〩〶『 〲〷〗〭〓〯〯〝〃〾〕〻〖〱《「〹〣〦 」〵〄〮〚〖〞〪〼〖〙〵》〰〃〘。【〺〖〄〪〝〭〆〬〚〬〨〽」〕」「〜〤〯〷〇〝〠〆〫〼」〭〤〓〔々〆〵〷〪〭「〆〖〇〽〄〄「〿〵〷〤〿〮〫〻〢〕〝〪〳〸〘〡〡〞〮〻】「〝〷〘〾〒〺〉〨〰』〳〓〃〒〪【〗〯「〧々〷〩〝』〭〇〒、〯〈〦〣〆〬〸〚〈〉〔〥《々》〹〢〺〤〝々《 〲〘】〚』〚『〯〼〾〱〵〻、〪〟〸〯〽〴〱。〵〪〫々〳〢〣〕〓〩「〘〜〨〻】〿〹〭〛〛〔〹〻。〛〴〤〢〮、〸〷〃〜〜〝〔』〳〮〹』〽 〶〛 〤」〢。〣〖〶〯〥》〢〸〸〤〕〣〘》〧〦」〘〻〶〾〮〢〳〝〙〻〦〺〇〲〢〔〘〶〩〖】〟〓〰〇〮』〦〄々〹〻〄〄〽〷〱〫〒〛〉〿〓〯〺〪〲〢〼、〫〬「〩《〡〕〻』〭〜〗〫】 、〈〙〉【〓〣〫〜〈『〾】〴〪〫〬〶〪〚〬〿〪〮〴〒〶〡〄〉〿〼〜〵』〻〼〢「〵。』〸〖〙〧』〾〖〙《〉〪〦〙〔〈 〤〫〦〸『〗「〣『〓『〡〨〖〥〭《〢〠〦〞〸〞〚〢〕〙〖〾、〩」「〗〈〰〸〤〴〶〤〙《々〆〽〆【『〬〝〸『〙、〪〻』〓〹々〥〲〉〪〹〫〓〽〪〩〷、〹〺〩「〞『】〡々〡「〇〉〺〶〾〔々、〾〻〪〣〖〡〩〥〾〯】〤〰》『〲【〙〭〽〛〿々〟 〢〃〼〕〫〲』〪【〛〯】〔〕〥』」〳』《〖〥〳〄〢【〩〮〫〥〝〯〿〟、〣〹〪〔〱」〖〢〘〛〾〾〜〒〝〷〚〳〣〝〟《】「》〻『〢〄〄 》〱〓〞〛〢〆〺〉《〃〭〙〻〞〷〩〹〥〦〫〞〄〇〯〽〱〼〴〾〕〸〿〱〪〨〟〠々〪〸〔〵〆」〔〖〴〝〟】《〥 』《〒〄〣〿〞》】〃〹〲〛〬。】〒〓〹〴〿〥〴〲〖〧〝〪〶「〕〔〞〜〸〬〒〽【〸〻〢【〱」〪〉 〉〘〪〻〴〞』〯〰〾〥〓〼〻〕』〠〃〟〩〛〔【〻〡「〘〔 〲々〻〚〈〪〱〾〷〗》〯〞【〩 『〕〪〈々〞〞〳〘〵〃〼〨々〇〞〈〹〧〢〃〢〮〆〈〤〘〬〟〽〩、。〲々〺〠〳〸 〸〹〥、〯〒〈〃〠〰〙〪〯〬〖」〔〹〔〘〶〾〨〿〛〈〡〯〕〶〲、〷【》〷〆》〄《】〒〓〔〼〉〒〢〄〢〓〩〰〃〔。〵〙』。〷〼〩」〒〒〇〳〆〘〯「〢〠】〱〱《〤〽〢〄〤〵〪」〆〘〲〪〼〷〕〚〙〢〳〲〦〥〃〩〳〤「〽〽〇〖〶〶〾〴〰〷〨『〟〲〬〵〲〸〩〕〣〫】〝〇〡〿〳〦【〧〖〓〫〿〣〖【〙【〵々〶『〵〟〠〇》』〲〹〾〰〰〙〚〖〳〞〄『〤〠〇、。〆〧 〒〘〱〾〢〲〵〇〼〼〪〤〵〓〴〦〵〛『〘』〭〔〯「〓」〤〼〱〒〤〶〰〖〬〻【〳〵〡〃〙〠〩〛〝〰》〸《』〦 〿〭〵〺〈〓〵〛【〴〤〒。〪〷〢〡』〒 〄〚々〽〄〔〖々。〪〠〢〸〮〵〾「〉〙〆〘〣《〩〽〃〄「〕〢〻〉〷〛〫〇〪〯〵《〷〚〕〇〟〔〛『〣〆』〸〶々〳〾《〭〯〫〄〔〗〨〺〛〴》〻〫〨〢〜〱〇〦〘〺〉〫〇〧〿〶〲〉〖〵〦〹〷〳〈〞』』〡〓〺〟〡〭、〧〺〺〱〟」「〠〡「〠〬〰〙〹〥〙〓〶〫〳〣〢〳〇〫』々〡〚「〮〘〭〹〶〸〮【〔〚〆〆〼〷〖〒〤〲〕〳〴〾〇〔〹「〦〔〹々〘〲〔〃〡〪〚〪〗〉〓〫〦 」〟〳〛〉〹〺〭〲〆〙〽「〱〘〿〡〭〦】、〠〰〢〥《〶《』〶〃〼〄〪〥〙【。』 〸〳〈〇〡〩〮〃〹〘〧〿〱々〿〭》〶】〥〜、〬〖〠〢。〾〫〔〩〥〫〓」〲〢〛〶〚〡〈」〡〦〼〰〔〾〨〔〄〹〬〛〃〇〸。〽〠〵〙〠【〶〉〇〗〔〒〒〇〉〧《〗〮〟〡《〉〻〧〝〓〱〧〜〘〦【〸〘〩、〵〡〈〴〭『〉〕〴〯〰〘〳。〴〃〙〨〄〈〿〒〕〯」〼〳〤〱『〓〚〛〳〣〳〺〒、〃〚〲〲』〳〃〷〵〹〷〾〞〞〹〣〢〨〵》〽〮〒〹〻〨〜」〇〗〨〙〒〃〆〫〹〉〻。〄〔〧〝〒〷〛〲〧〪〺〚〼〳〒〙〫〢「〲】〾〬〸〷〿〉 〱〛〙〰〜〧》〳〉】】〮〈〗〢〧〟〠〣 〭〵〰「〼〽〭〫〘〴〲〺〾〘「〮〯〩〛〤〣〥〛】〱。〬〴〞〰〣〻〵〹〤〇〴〮〦》『〨〛『〡〞〥〄〠〸〽、』〣〬〢〠〯〰〄〇〆》〇〵『〹〛、〃〟〙〡〷〿〩〥〶〲、〓〧〲〪〚〕〞〢〗〖〝〰〵〪〴〿』〱〮〳〫】《〹〟〻〝〓〦〣〞〤〷〠 〃〈〛「〱〿〆〟〟〉〤〿〈〦〥〻《〻〼〇〢〰〢〒【〞〆「〢〻〧〇。〭々【〪『〪〓】〹〃〄〹〕〝〒〚》〔。〕〶〺《』〦〗〳〰〶〨〔々〖《〰〷〛〩〨』〤〻』《。〵〱〼〵〛〝〧〼〡〶〧〾〯〷〞 〧〛。〦〛〪〕〶〱〆〤〻〹〱〰〖〨〥〚々。〾〽〦〸】〛〇〫》〃々々〲《『『〱〘〲〕〦〇〱〈〞」】〞〨〖〚〽〧〥〬〰〬〥〇〡〼〴〲〠〭〖〵〯。〙〪〖〯〄〾〮〗『〉〴〩 〃〚〲〠〨〟』〖〜〥〛〉〲〃〃〮〳〡〳〩 〄々〞〨〛〪「〼〓〭 々〵〘〄〝〭〖〰〾〬〆〸。〻〓〞『〥〗〪〚〇〞〭〤〉〼〬〕【〤】〥〡〛〖〕〆〧〝〧〺《〭〈〸〪〆〺〸〝〭〇、〆〯〴〸〤、〾〒〉〰〛〷〽〶〿〰〫〜〔〪〱〇』』〰〨〞〓〽〻〻〙〪〠〨〗〓〣〨〾。〜〃〘〚〇〟〖〗【〥。〡「〾『〙〢〦〹〩〟〠〘】〾〒〈〔『〣〲〉〉〻『〇〦〽〿〼〾〚〮〧。〷〰〲〧《〹」〕々〻〤〗〦』《〳〢、」〤 〰〞〠〨〾〪〯〮〳〒 〰〜〼〕〰〳〄》〤「〗〽〇〠〔〝〚〽〣》〷〙】〶〷〆』〇》〓〄〤〸【〡】〾『〯〶、〵〨》〼〗〨〶〉〄〭〓〲〞〝〞〡〻〷〻〣〰〈〽〮》〲《。〸〶〿〣〞。】〡」〖〩〔〜〘》〤〦」〓『〨 〹〞『〛〡〧〬〃〷】〔〫〆〤〻〲〆〯〞〿〧〔『 。〓〳〝〢〿〮〯〵〮〨》〴〒」〒〷〻〶〡〽〤〭〽〰》〾〹。〳〔〹》〴〕〫」〹〜〻〦〳〕〺〘〴』〈〽〲〃〔〙。【」〇〨》〨〴〿〄〻」〉》「〚〺〿〹〤』〄〸】〴〩々【「〫〒】〄〛 【〰〯〶〰〉【〮」〦「〣》〴〙〿〽〄〔〈〓〻〠」〚〯 〷〄〆〳《〸 〴〕〩〸〾〡〼〻〆〬〶〞〓〤〩〿〪〻、〠「〲〓〠〦〛〢〓〇〸〡〬〱】〞〫〽〖〉、〻〿〈〸〓〹〯〰〸〰〘〫 〬〬〽〦〣〾々〥《〰〗〩〰〞】〪〆〷〳〚《〯〱〓〣〭〗。〬「〢〸〮〤〓〖〾〣』〘〳〕【〼〤〔」〵〰〪〡〲。〤〃』〧〙【。〝「〶〻〝〖〢〡〿〓〖〺〝〈々】〈、『〼〣》〔〪《〢〣、〛〕〙〞〭〿〧〵」〴〾〯〫「〨〕〨〄〷』〵《〶〼〘〗】々〖 〳〶「《〝〰々〢〙〈〣〶〟〓〱〬〇〷〦〿』【〕〪〶〺〽〄〡〷〽〲》〟〃」〵〤〞〤〠〜〵〽》〉〡〦〖】〉〓〥〤〞 〺《〖〗 、」〯〳「〾【〩〮。〝〮〙】〦〴『」〘〕〉〚〯〳〇。〾。〇〔』』〚》〃、〠【〝〮」〟《〆〮〇」〥。〟〦〿〠〟〰〺〳々〯】〨〸〼〳〭〶〷〮〨〳〘〤〦。〠『〸〖「〰〝〡〻〻、〇」〇〚』〧」》〮〲〫】〱〼〻〲〷〓〉〵〩〢〣〻〚〞〧〰〽〕〭〧々〠〹〃〟〄〰〚〽〣〚〥〺〛〟〄〮〟〴『〾〒《〺〡 〒〜〈〶〔〫〲〃〟」〿〘〥〥〥〓『〝。〧〾〓〶〺〆〷〩〣〫〜〿〿〰「〕〒〓〯〣〘〗【【〪〾〛〕〽〫〹【〿〧〛〵〲〛〒〇〉〧〺」〺〺〡『〳、〪〾〒〈〮〜〞〙〱【《〣〬〈」〣〵〹〥〵〞〻〆〭〵〟〒〲〧〓〖〣〓々〰〞〹〇〮】〪〫〶「〦〽〓〻〓】〽〭「〣〔〹〯〨〖〩〵〦〳〯〯〧。〗』〾『〩〗〴」〼〗〨〵〥〴、。〒〣〧【《〓〜〓〠〢〓】〷〺〼〕〡〆、〦〿〥〾〚】〕〦〖〙 〭〬〙〇〳〄〃〄〻〧〔〚〰〲〟〷『〫 】〲〲〸〳《〢〵〰〟〪〉〜〨〇〶〻〻〩〄』〒〴〨〈』〗〿〚『〝 〹々〳〼〲〗〙「〵〲〢〔〫〵〜 〘〶【〬『〱〗、〧『〛〇〛〒〈 、〦】〙〇〖〤〩〜〉」〉〿〬〧【〶〦〃〘〈〖〄〶〦〚〜】〛〽〡〸〰々〈「〾〼〒〥〞〸」〮〸〒〗〙々『〇〄〈〃〜〺〯〉〉〾〹〺〚〞〽〦〄〢〽〄〞〻 〼〄〘〙】〚〼〫〴〚〫〬〖〭〔。〰〹〶〺〕〨〇〛 」。〇〿〲「。〆〗、《〫〬〨〻〝】〓〥〾〴】〹〈〞〺〜〰〜〬〴〱〜〖〾〣〭〥 〯〩〶〈》〸〝〼》〶〆〆〽〼「〗〓『〕〃】〡〠〹〺〈【〸〝〤〮〸〭〩〼〈〃〃〉】〳〿〃〬《 〩〈〒〢〠〆》〇〭〬〓〖〝】〧〶〞〈〶〘】」〽〝《〡 〈〟〶〯〹〦〨〷〩〧〞《〵〬〰々〞〧〓〥》」》〤〥〧〧〓〛。〦〄〫】〪〔〟〟〷〧〷〟〺〪〩〷〡〘〞「〔〽〯〔〬〈、〴〨》〥〒々〼〒"; - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkAnalysisConsistency(random, a, random.nextBoolean(), s); @@ -185,7 +185,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void testYetAnotherCuriousString() throws Exception { Random random = random(); final String s = "〦〧〷《〓〄〽〣》〉々〾〈〢』『〛【〽〕〗〝〓〭〷〷〉〨〸〇〾〨〺〗〇〉〲〪〔〃〫〾〫〻〞〪〵〣【〩〱〭〨〸〃々〹〫〻〥〖〘〲〺〓》〻〷〽〺〯〫』〩〒 〇〔】〳 〵〮〇〡「》〭〆〒〜〱〒〮〺〙〼」〤〤〒〓〶〫〟〳〃〺〫〺〺〤〩〲〬 〱〜〝〤〘〻〚〻〹〒〃」〉〔「〺〆々〗〲〔〞〲〴〡〃〿〫」〪〤」「〿〚』〕〆』〭『〥〕〷〰〝〨〺〧【『〘〧〪』〫〝〧〪〨〺〣〗〺〮〽 〪〢】「〼〮〨〝〹〝〹〩〳〞〮【」〰、〳〤〩〄〶〞〠〗〗〙〽々 〟〴〭、《〃〝〈〒〸〷〓〉〉〳」〘」》〮〠〃〓〻〶〟〛〞〮 〇〨〭〹』〨〵〪〡〔〃〤〔〇〲〨〳〖〧〸 〴】〯〬」〛〨〖〟》〺〨〫〲〄〕」〵〦〢〴〰〨〺〃〓【》、〨〯〥〪〪〭〺〉〟〙〚〰〦〉〥々〇】〼〗〩》。〩〓〤〄〛〇〨〞〣〦〿々》〩『〕〡 〧〕〫〨〹。〺〿《〪〭〫〴〟〥〘〞〜〩。〮〄《〹〧〖〿》〰〵〉〯。〨〢〨〗〪〫〸〦〴〒〧〮」〱〕〞〓〲〭〈〩『〹〣〞〵〳〵》〭〷「〇〓〫〲〪『『》〧〇〚〴〤〗〯〰〜〉〒〚〔〠〽、〾〻〷〶》〆〮〉』〦〈〣〄、〟〇〜〱〮〚〕》〕〟〸〜〃〪〲〵〮〫〿〙〣〈 〳〾〟〠〳〙。〮〰〴〈』「〿《〄〛〩〪》「〓〇〶〩〇、〉〦〥〢》〴〷》〦』〉〟〲〚〹〴〲》〣〵〧〡〾〦〡〣「〆々 〔〄〓〡〬〹〣〰。〵〭〛〲〧〜〽〛〺』〛〵〒〽〻〆〚〚〟〵〲〺〠〼〻〄。〯〉〃』〕〫〥〦〕〔〢々〷々〥〥〖』〶〿〘〗」〖『〢〯〫〇〣〒〖〬〜〝〩〉〾〮〈〩、〘〰〦〧〓〬〸〓〺〼〟〰々〩〩〹〣」〓〸〄『〆〰〹》〵〉】】〼』』〸〣〦〾〰〗〴〥〴〤〃〿〡〳」〢〩〡〮〻〘〤〝〗〃〪〘〈〴〪〯「〭〓々〃〯〄〼〚〧々〢〃〈〔。】〆〣〰〜〪〮〣〿〕〮〾〱〇〈〟〭】〔〥〡〝〙〛〔「 〼〶〸々〹〯『〞〒〇〟〃〳〓〩〝〿《〵】〙〛〪 〭〼〈。〷》〨〰〵」〤〄〾〄「〈『〥〽〕〙【〤》〳〝〔〠〤〲〘〱〈『〴〫〚「」〛〸〹】〱〒〆」。〯〃】〼〮〒〄》〾〷〥〟〞〲〜〲〟〫〕〆〇〸〸〹〾〰【》〨〤〭「〇】〳〯〤、〙〳〺『〲〽〬〥〠。〹〃」〹〪〭〒 〇〶〧〟〻【」】〙〤〡〱〖》〇々〽〬〥〨〠〘〺〳【〫〄〜〹〄〚〯〈〸〻〓〥〤〻〮〃〗々〪〺〿〬〙〈『〭〩〟〽〬〝〄〦〇〥【〨〫〦〗〯〞〜〈〒〽〖〧〼〈〭〓〶〃〰〙「〧〉〹〢〕〼〒〸〼〣〡〔〩〯〼〚〲〖〪〯〒〮】〥〙〯〆〡〲〾〭〫〕〘、〖〮】〟〺〝〨〤〯〓〛》〳〢「〒〥『〿〔〸。〫〬〡〓〝「々」。〘〣〲〴〆〲】〽〮〮〲〓〞。〲〘〉【〲〭〰〨〩〱「〆〩。〦〉〇〄〺〱」〮〄〯。《〭〹〳〸〜〮〧〷〜〹〥〾〨〬〦〮』〖】〖〥〞〕〧〹〽、〺〜〯〒《々〠〠〴〝〤〇〷『〳〞〠〤〣。】〝。〛〉《〩、〦〻々〄〙〞〽〒〧】〉〺〦〔〄〯〙 〫〴〈〽〴《〰〱〗〢〓〔〗〖〖〪〷〠。〨〠〙〴〷〿〻〴〪〠 》〉「〛」〟〗「「〚〤「〫〨〣〉〶〥〢〈〯〄〈】〃〵〪〼〸「〾〥〒〲〮】〙》〡〯〓〵〡《〬〾〛】〄〡〦〪、〆、〵〒〹〰〴〜〬〶〭〕〟〠〰〜〶〵〨〾《 〻〵〔〘〟〾「〡〃〼。〤〺〭〨。〪》〄〇〄〔〖〺〪』〆〸『〰〭〆〗〪〪 〇〜〡〨〞〧〇〛〥」〼〇〼『〸『〵〼〇 〽〹〨〪〗〳〽』〵〽〸〷〄〿 〩〢〺〳〗〞〹〒〼〕。〇〷〔〯〜〘〾」」。〥〯〤〖〛〙〹〘〯〡〱〮》〰〾〚〚〣〆〰〹〾〝〉〲〠〗】〤〿〶〱〾〇〽〤〰〆〭〝】〤〰〼〪〬〰〸〓、〃〵〄〉〤〲〱〨〵〴〮〹〬〧〜〭〶〒〯〺〬〒〭〲〡〔〚〹〇〫【〯〥〪〻々々〨〧〳〛〯〿 〈〽〥〘〖〣〿〫〲〶〚 〓〙〫〴〆〙〶〽〉、〔〪〫】〤〟〓〃〝、〧〡〸〸。〸【〹 〧〡】〡「〗〴〴〳〶『〱〖「〺〠〼〾〱〃〖〤「〧〭〟〇〧〙〕〩〭〻〤〩〪〳〪〟々。〷〥〗。〳〸〆〢「〆〿〻〚〳〚〸〟〘〡〘〇〶〖〡〇〾〥〖〝〝〹』〦。〖「》〥〞〳〛〕〖〥〻〙〾〔〬〈〇〓」〭〹〷〪〖《〫〾〒〙〺〻〨〼〇〝〾〣〴〚〩〴〕〢〦〩』〭〧〵〾〟〣〬〥〟〣〜」、《〲〧〪〸〸【〙〹、〤〽〰。〦〩〮〹】〸〆〹〗〓〶〇〤〳】〾〨〞〩〱〡〇〱〮。〶》〝〱〗〃〘〣〬〲〽〈〒〻〃〥〪〭〤〗〰『〵〹〙〇〵【〕『〤〄〕〥〵〸〮。〳〮、〤〣〱〧』〯〜』〉】『〷 〰〵〓〙〃〟〆〼〞「〫〄」』 〨〹〸《〷〔〫《〝〞〆〬〩〟」】〾〷〄》 〵 〫〵》〻〨〰 〟〈〰〽「」〸〣〪〮〛〞〜〦〱〚』〕〱〪〲〩〥「〚〓〺〣〶〨』〕〇〮〹〟〞〕〶〡〭〠〕〦〦〢〽〤〈〈〻〣〧〱〿〵】〖〞〖【〢〩〼【 〻〘〃〤〫。〠、〗〢〷」】〼〘〖。〤〘〄〢〴、〘〆〯〱〜〃「〦『〯〰〘〫〹〶〷〿、】々〙〛〜「〹々〮〿「〸〉』〯〱〄〓〥〣〩〥』〖〤〛【〭〿〺「「〳〛〧〉『〈〆〒〠》〳〈〳〩〃〮〚〼」〲〮〩〮〮〢〸〿》〈〉〗〾〇〕〩〸〖〾〠》〃〞〄〣〭〡〕〣〚〆〤〄。〸〞。《〼〄〤〸」〿》〤「〵〥【〔〕々〙〸〛〛『〶〾。〷〫〼〽〤〨〓〭〻〈〶〿〾〨』〤【〾」〇〤〒〠〺〜〸〼〪〢〷〔》〣〤〬〣〱〝〇〺〢〠〤〹〡「〪〲〿〬〘〡〯、】〖、〈〶〛〢〕々〽〼〼〚〿〘】〢〰〡〿〗《〉〙《《『〶【、、】〡〓〦〞〵〤〧』〝〕 〄〃〸〈〤〪〻〭〉〘〷〉〕〨〻〢〢〡〸〔〮〧〹「〦〘〉〾〉 〺〽〷「〺〖〺〝》〃 〇〪〜〶〺〣〇〭 〾」〣〼〞〷々〽〤〶々》〻〈〽〒〕『〬」〈〟〕〷〼〲〄〚〜〴。〮》々〧〻〔〕〈「〾『。〴〷〯〢〿〦〈〸〩〻〃〻〚〞〤〈。〧〇〾〺〢〓〵〸〛〔〡〷【〜〺 〕〶〦〣〻〟」》】〺〚〷〺〹〙〳〺〬〓〢』〘〕「〸、〙〾》〖》々〬〄〇『。〵【〩『〺〆〮〮〙〵〫《〃〽、〓〠〨〚〕〈『〦【〗 〄〴〫〡〮〱〔〆〗〟〵】〻々》〲【〬〢〚〛』〱〰〫 〇〤〴〮〾。。〮】〇〲〻〙〰〥〚」〟〜〄〟。〤々〞》〧〉〳【〿〺〆〈〖》『〤〄』〾〵〲〸。〈『〕〺〘〣〶〬『〪〆〳〽《。〒「〽〨〸〜〚〘〪〤々〦〆〺『〣〆〽〇〿「〥〵〒〲〟〜〳〭〼〆〡〮〆「〆〥〺》〱『〺〔〃〙〻〥々《々〙〼〪〼〵〙』〥「 〵〯〓〩。〰〕《〟〦〝、〦〦〤〗〴〩〹〶〠〰〡〇〤〹〓」〣〆〜〴〘〔〃「〤〈〩 〠【〃〙〢々〉〝〬〙〭【〮〗〙〤〿〖〓〫〻』〞〤〼〳〹〄〵〾〔〛〮。〒〉〤〣〭〰〨》〭〲〗〃〇〆〡〜〱〲〮〫〄〬〄〉〯〈〮〩【 〮〦《〪〲〣〡〶〬『〲〵〇〶〰〒〭〽 〰〄〻〄『〬〩〠』〕〫〤 〼〶〳〮、〓〸〲〓〜〳〺〈〫〺〒 〨〡〡【〷〆〇』〝〩〨〗〕〪】〪 〛〛〺〙〷〦〠」〱〞〼〸」、〢 〺」』〲〆〃〟〱〟〝「《〸〳〒〖〨】〥〖〈〧〼。〫『〙〧〡『】〔々、〼〝 〕〙〇〘〲〔〝〺〘〄〓〒〼〈〛『〺々〩〱。』〬【〱「〳〜〼〬〴』《〗〔〡〰〪〤〥〲《』〥〉〪【〶〤【〻〡〒〯〜【〽〪〉〠〾〙〰〚〵〦〦〴々》〙〠」》〠〱〓【〶〦々〻【〽〶〼〺〷。〶【〘》〻〗〳〣、」】〳〓〞〆〆〾』「〈〙〕〱〢〳〨〰〡〸。〣〪〤「〱「『〙〽〇々【〜〖〮〚〟」。〜〰〉〔。〣〽〇〖〬〆〥〖〧〨〱〡〸〪〣々』〄「『〞〶 〴〰】〃〱〱「〶〝】〞〭〚〴〶〻〟〧〡〳〬〧、〣】〕〼「〠〃〷〣〩 〭〄〩〝〦』〟〇〦〟〕。〩』「〵〩》〿〻『〙〼〲〰》〨〉〆〓、〺〹〸。〞〧〗〘〳〓〞〹〕〡〼〔〖〴〄】〚〻〯〴〣〮〦〧〣〵〼〚〾〫〼〣〔〚〽『〵〒【【〝〹〮》『〨〜〠〸〠〵〨〙【〧〸〈》〱〗【〓〤】〰】】、〩〽〈〸〔「〵〻〙〓〰〇〚〞〗〙〢々〭〜〈 。〧〿〧〨〵〾〝〬【』〫〦〸〬〈、〒〢〉〞〵〒〼〝》〻〫〧〤〶〹〼〩〛〫〣】〿」〴》〺〬〤〕〲〕〙〔〪〰〿〬〒〔〞〆〻〴〘〩〨〤》〩〪〭〳〇〣〚〟〚〕〓〴〱 〵〃〠〭〠〚〗〃〃〸〰〢〡〿〭『〗〉、〲〕〧「〛〛〓〜〰〮』〱〨〬〨〽〸〽〶〣〯〫、〯々、〴 」〕〥』〻】〖〴」〨。〖〤『〜〰〩〣〣〸、〫〝、〯〹〷〳〚〄〷【〃」〼「〤】 〢〖 〣〙〺〽〽〱〤〔〓々〣〭〽〘〦〻〪〿〞〝〱、《〆」〸〷〛〓〕〹〜〪〹〶、〵〦〛〲〒〹〪〦〃〥「〸〪〙〧〱〠〰〝〆〠〯《〼 〛〚〔〟〽〗」、〲〥〞〴〃、「『〖〼〞〪〼〇』〿〶々〙〻》〥、〵〛〞〠〫〟  〹〾〵』〤〿〣〪〗〃〖〬〩〴〗々〓〝〥〥〜〲〯〗〤》〛〮》々〚〘〫「〙〉【〆〽〨〹〮〧〷「〴〝〬〷〗『〔〷〮〟〲〬〸〸〟〹〆〖〨。〣〄』〴〚】〘〲〚〚〦〈〛〗〞〉〞〯〆〵〸 〗〕、》【〸〮〵〉〥〨〕〟〭【〾〇〵〬〾」〱〹〚〟〛〡」〩〃〄〬〱〭〚〱〆〛》〣』〝〡〦〣〫〒〗〛〿〤〇〼〠〲〢〬〿〓〠」〚〇〛〈〴《〦〱〤〹〝〱〶〟〙〴〶〣〝〮 〜〲〱〿〳〪〄〝〃〰〙〖〼〰、〬〰』「〭〻〮 〩』〱【〆〻〺〸〾〤〗〸〥〽〼】〤〣〖「《〡〙。〸、」』〠》〴〈、〴〢〣〲〟〳〸〒〠〣〵〢〿》』〿『〾〔〢〶〦〟〠《〹「〷〽〷〆〇〉〲〿〵〙』〫〠々々〘』《〽〒〦〽〓〳、〮〻〫〞〲〰】【〗」々〥』【〫〆〫〳〾〣〖〺〷〙〘〄〈〼、〧〻〭〮〳』〘〾〇〸〉〽〗『〙〽〻〟〇〘〽〖〴〄〓〞「〦〪〚〾〨。〕〻〰〟〉〢「〉〿〯〔〹〃〛〛〝〔《〵「「〴「〗〸〖〞〦【々〣〲〤〾〿〽〲〥〢〥』〳〳〼『】〆〼》〩》」。〛〲〡〳「〢〥》〘〠〃〳〃〒 〧〓〡〤〄〲」〦〶〷〟〛〠〱〽〫〫〸〇〔、〪〛〠 」〢〳〸『〸〚〹〈〘〉〫〇〲〲〈〕〙〱】〯々【〬〖〿〒】〔〭〣〚〄〈』〧〗〹】〇〬〸〾 〭〺』〯〫〻『〘〻〱 〴〆〘「〠〈〫〡》〤〕】〜〙〵〒〙。〦〮〞〪』〴〓〪〾〝〹〴〼《〦〞〖〆《〥〸〻〈〽〪〤【〖〶〞〤〃〰〨〱』〨〼〱〠〣」〝〹〝〕〼〔〃、〮】〤【〼〤〼〥〪〲〓〦〘〟〞〭〜〸】〚〸〵〞〙〧〈〽〹〄『『〙〓〸〯。〜〺。「〖 〶々〉〈〮《〢〭〶】〘〜〺〸〒〥〢〾〈』〱〃〤〳〖〉〼〫〛〚〽〫〳〰〫〥〜〜〺〷〲《〢『〛〭〈〧〳〣〜〝〧、〥〾〻〳〺〕〥〥〼》」〺〮〒〣〥〲〟〠〫」〾〱〼「〄〆「〓〽〹〵〈〙〛〵〰〩〟〫〈〔々〒〟々〉、〷〚〶〆〘〛。 【「〸〸〖〫〕〰〱〺〟〫〿〹〩〇。〾〒〚〲〾〛〳〨〦〙〒》。〺〧〡〞〒〚〩〪〶〘〣〨〶〩〛〺〙〪〄〼〮〰〒〡〼〓〙〒〇〽『〃 』〇》〽〃《〒〠「〚〨〗〶〴〪〮〵〘〨々〓〗〚〠』〗〮〳〺〲〙〒「〴〼〻〤〉〯〨〧〈】〾〟〝〒〃〘〧『〶〿、〤〝】〜〴〰〷〽〮〱、〩〽〺〯〫〜【〴〈〳〖〬】〦〘〗〜〝〄〚〚〤〨〲』【〞「〰〔》〷〥〈〡〳〢〾〮《〭〫〡〴〹〻〚「〰〻〉〣〢〤〤〝〩〧〙《〓】〺〺〓〿〹〈〚〱〬〘《〽〈〕》〣〓〒〴〆〜〭〖〛〝〷〧〴〮》〳〘〸〴〿〥〙〒〔「》〓〕〦〯〾〯〝、〮、〯〆〛』〞〝〵〥〬〚〡〰〔〵】。〽〥〿』〩〇〝〄〴〪〭〸〫〡〣〧〆〚〫〴〙〦〽〉〸〼。〱〨〛〠。〮』〝》〻〹〈〄《〻〱〥〞〽〾〄〝〢〿。〴〆〲『〰〢〖〲〼〯〃〠【〲〵〛〣〝〕〬〺〰〪〻『〨】〖〥〵〹〯 〒「〠〮〈〃〹〽〬』〹〷〫〕〧〟〒〉〉、〈「〟》〼〪〰〗〘『〞〉〹〚〤〩〦〗〖〮〰〇〠〫」〔》〮、〆〡〛〻〙「〵。〯〹〘「〵〫〼。】〃〢〺〴〛〪〬 〞〟〓」〭】、〸〘〻〈〤》〓〩〽〆〵〨〈「〦〠々〨〒〢〛〝〿〗〥〱〕〩〖〣〄〚〿〆〗〢〉々《〚〩〶》〥 【『〪〯〾〸〪〲〞〠 〡〓〻〷〢〕』「〹〯〛〫〲〗〗〚」〵【〪〢〥〫〆》〦〥〱〯【【〉〧〺〻〉〬〳〒〳〾〲〲〇〇。〪〙〧〿〆【」〇〪〸〽〦〚〽〿 〠〺〥〦々〬〄〟〪〭、】〴〾〸〛。 『、《〫〺〯〛〩》〓〴〪》」々〧【〦〇〮〬〲〗〔〦〴〣〼〨〖〩〬〼々〛〇」〴〦〉〤〺〪《 〒〧々〤〧〣〘【〵〛〢〵《〛〘〵〓〶〳〤〺〨〣〭〤〪〮〺〷《〗〵〞〻〠〭〃】〄〒〯々〶〉〞々〽〤〇〦『〦〽〩〬〠』〷〄〩〙〖〝『〘『々〔【〿〰〶〪〱〉〘》〃〙〧〦〇「》《〹〰〯】〹〄〈〪〜〵、〮〣〇〯〲〛〬〕】々〸〹〩〟〳〆〥〯〬〠〭〯『〙〆〾『「〈〬〹〕〾、〸」〷〥〆〺〾〖。〆〒〮〻〡。〉々〕』『〨〼 〢〓『〢」々『 〘、〖〤〜【々〤〷〵〳〤〽」〟〥〴』、〒〥〆〙〬〧〔〡〄》〷。〣〉〪〙〚〾〣〵〰〮〔〇〝〫〫〩。〪〷〩。〇〿】〲〦〳〕《〄〴〦〽〔、〱〧〟。〻〺〔〝【〲〔〦〙〖》〠〫】〵〙〰。〖〸〼〣〗〲々〤〢〷〝〰】〳〳〯〟〓〬〺〤〿〲〩〞〡〧〲〧〭〽〪〰〥〧〴〈〈〢〕〯〔〨々〭〸〡〖〓〤〒〝〻〻』〣々〸【〸〸〷〓〇〦〻〤』〉〾〛「〢〢《】〜々〛〇〠〒〹〖〽〮〚〫〜〼〄〓 〹〽《〽》〮【〺〦〠〨〰〸〘〲』」〹〳〤〽〴〴〰〳〷〟】〼〽〓〇、〡〚〶 〥〄〉〴〵、〷〳〥〬〳〓〩〯〜〪〯〬々〢〾〆〨〥」』〪〄〨〽〗〭〯〼〒〡「々〩』 〉〔〓《〉〺〫〖〽〱〳〡〪〯』〼〉〝〟〹〯〇〠〥〨〖「〢「〥〲〘『〹〥〶〜〥『〃。〲〗〢〩〮〕〨〸』〪〯〲】〠〻〟〶〣〸〵〩〔〾〞〳〾〇〵〥〟〭〳〡〆〾〤〶】〈〓〄〮〢〒〩《〔〭〄》『〰〧〡〖〵〥〵〒〭〳〵〝〜〱々〞〰〴〦〱〿〾〴〪〥〧〚〚〒〚〘〿〛〾〫〚〕〷〔〗〢〻〠』〘〾〖〿〦〥〮〆〼〞〴〹〸〻〵〞〄々〷〔《】〛〒〻〓〴〮〛〺》〫〬々〦〦〬」〯〞〼〚〘〰〿〝〾〘〠〵〴〃〞、〹〢〗〹〰〤「〔』〇〒〭〫 〞〉〿〜〳〫〩〿〧〵〟〾〤々〩〝「《〬〃〇〬】〔〇〆〷〭〬〵〾〚〺〬〧〻『」〈」〻〹〞、】。〉〯〫〺〒〙」〱〛〻「』〱〺〠〄【〿〦〰〸『〬〴〓〨〢《〣〓〜〒〡『〼〔『〵〕〝〗〳《〲〳〼〝「〽〬〱〺〠〱〽〘〗〹〨〆〕〠々〓〤】〺〉〴〰〮」〰〿〹〳『〠〔〇〧〭〼〪〭〯〖〶〬〃〱〔〙》〺〜〵々】〡〧〲』〕〛〳〥〩〱〮《〦〫】〖〈》〞〻〤〢〦〪〬〲〗〢〷  〳〰〓〕〜〥」〬〗〒〜〉〩〆〬々〿〪『〣〘〡〘〯〳【〄〠〸〼〈〰『。〟〲〭〡〷〥〯〴「。〤〓〪〆〦〆〒〽〫〰〚〡〨【〯〹「〧」〓〖〘〳」〕〲〚〣〕〆〃〱〞〷〺〻〃』〩〫〦〱〴〟、〰〘〞《」〛〤〿〔 〤〱》〗〷〡〡〗〞〦〿。〤〳】。〟〻〉「〻〙〖〿〄〶」〾〫〽〸〕〢〰〞〞〒〜〻〠〭〫 〞〴〰〶〺《〣々〩〲〡〴》、〩〝〞【〼〓〱〻〩〒〖〿〮〱〧〟〒〶、〿〈「〻〴』』〇〉〝〛〢〜〼〘〰〇〢〃〲〟〨〟〣〟〰〉〮〘〽〧。〓〳〩〺〳〓〘〗〖〈〜〴〟〽〣〣〾〽〩〲〜〇〰〩〕〧〚〄〴〴〴〨〠〦】、〣〺〖》〯〷』〒〤』〙〗〬。〧〆〜 〧〩〯〞〜〬〡〆、〞〔。〾〩〈〛〼」〾〮〤〾〟』〉〔〞〾〛〲《〈〫〝〽〳〞〔【〿〽〩。〈〨「〯《々〇、〯〜〾〝〯〼〆〟〉〝〮〙〪〚〮〱〹〯〜〟〠、〄〹〧〳〱〯〖〯】〩〴【〫〇「「〿〩〷〾〴〯〦〼〦〟〖〤〪〥〰〔〻〪〄〖〳〵〟〕〰〬〶〚『〘〻〇〽〪「〉】〮「〣〿〇〭〕〓〵〽〆〳 〨〩〕〬〵〸〻〲【『〥〖〚〢〰』〠。、〮〣〆〴『《〲〓〷《〱〰々〫〶〢〯〗〚〙〶〫〖〃〻》〰『〱〘〫〛〄〉「〠〱〚〖〕》〤《 〵〶〢〯〗〳〛〚〽〗〟〛〪〾〶〞〶々〆〯〇〝〕〨〨〣〫〄〵〞〛〬〣《〦〦〒〉〙〫》〞〨〜『〝〻〒〟〓〜》〡〡〫〻』〆〒 〔。〓》《〨〙〿〙〔〘〮〦「〚〻、「〵〠〉〬。〭】〱〸「〶〈〞〈〪〟〻〝〲〮〆〼〯『〱〡〙〮〕〒〣」〳〥〙〡〡『〇〠〡〭〷〜々』〣」〼々、〗〡『〽〻〽〳〉〄〵〬〽〯〥〾〙〉〿〮〴〷〥〡〰〹〰《〺【〒」〙〾〽。〴〘〕〝 〳」〡〇〩〥〾〆〨〉〫〠〙〤〒【〸々〣〓〰」〈〪〵〠〚】〈〆〵〗〜〦〣〃〼〔〉》〆〞〚〆〄〫〺〽〪々〩〴〵〹〿〔〥〜〩〪〤〗。「〽〨〟、〄〽】〩〙〝〺〶〸〟〯《《〥〣〻『〟〽〮〄》〙〕『」〾〼〷』々〥〒【、〗〔〯】〮 〹〩】〡〇〟〫〢〨〡〭」〄〼〙〪〻〪々〙。〫〧〪〞〾〄』〟〶〇〞〜〥〘。〝〨〸】〕〔〨〕〾〃〾〒」〈〒〓〼〗〖〕〱〙〘〓〝〾〔【〵〿〖〸〷〵〩【〞々〼〢〧〻〥〰〦〤》〰〛〡。〖〝〙〒〽〜〕〘「。〵〇〒〾〼〽〈〣〇〒〙〢〸、〞〲》〪〰〴 〽〭〷〸〫〆〞〾〨〆〛〔〤〜」〈〨〃〈〴〽〲」。【〞〒〉。〱〕〨〽。 〷』〦》〵〩〪〡〕〞〹〃〧〃〝〢〴。〃〛〭〻〣〸〖〞〻【〛》〜〳〜〟〘〄」〸〬〶〥》〨〭〡〦〇〇《〱】〸〼〺〬〛〓〔」〰〈〧、【〕」〳〼〗〯〉〒〖」〧〩》〴」〺。〰〷』〩〚〭〞〰〶〚〲〙〥〢。〽〵〱」】〓〘〦。〭《〥〙、〱〹〦】〕》〲、〘〓〙〷、〪〕〉〭、〇〜々〖〨〞」〠〕〲〨〕〔〻〿〙〘〙』〼〘〡〢〧〚〢〷〸〰〟〰〗」〪〛【〪〺〒〱〈〦〽、『〥 〙〪〕〝〄〛〣〴〯〆〒〰〜〪〆〠〞〾〃〭〬〡〉】〄〃〥〥〒〶〕〢〵〣〢〨〘〩〹〖〧〒〺〫〕〡〆〭〘〿〠〹〲〔〫》〪〰〇「〯〫〈〾〱〄、〮『》〹〿〿〱〦】〳〰」。【〘〆〞〚〱》〫〷〸〠〲〚〶〷〘〩〯〛〄々 』〪〭〬〖〪〦々〼》〇〤。〉〯〟〮〢〤〬〜〪〬〺〿〹〖〔】〕〖〣 『〵〸》〧〻〺〜〧〯〄"; - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkAnalysisConsistency(random, a, random.nextBoolean(), s); @@ -193,7 +193,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void test4thCuriousString() throws Exception { final String s = "\u10b47\u10b58\u0020\u0078\u0077\u0020\u0020\u006c\u0065\u006c\u0066\u0071\u0077\u0071\u0062\u0020\u0079\u0078\u0069\u0020\u101da\u101d5\u101e6\u0020\u0074\u0020\u2c55\u2c18\u2c2d\u2c08\u2c30\u2c3d\u2c4f\u2c1c\u2c1b\u2c1c\u2c41\u0020\u003c\u002f\u0073\u0020\ue22a\u05d9\u05f8\u0168\u723b\ue326\ubf5f0\u0020\u0063\u006a\u0072\u0070\u0061\u006b\u0061\u0071\u0020\u0028\u005b\u003f\u0020\u003f\u003e\u0036\u0030\u0020\u0020\u0065\u0068\u006a\u006b\u0075\u0074\u0020\u0068\u0067\u0020\u0071\u0070\u0068\u007a\u0061\u006a\u0062\u0065\u0074\u0069\u0061\u0020\u006d\u0079\u0079\u0065\u0067\u0063\u0020\u3066\u3082\u308e\u3046\u3059\u0020\u2125\u2120\u212d\u0020\uffbe\uff5c\u0020\u0067\u004c\u0025\u0020\u0020\u2df6\u0020\u006b\u0020\u0066\u006a\u0070\u0061\u006e\u0064\u0020\u0067\u0072\u0073\u0020\u0070\u0064\u0063\u0020\u0625\u0278\u6722d\u2240\ufd27\u006a\u0020\u4df1\u4dee\u0020\u0072\u0065\u0063\u0076\u007a\u006f\u006f\u0020\ue467\u9d3a0\uf0973\u0218\u0638\u0020\u0019\u0050\u4216c\u03e6\u0330\u894c2\u0020\u0072\u006d\u0065\u0020\u006e\u0061\u0020\u0020\u006d\u0075\u0020\u0020\u0063\u006f\u0074\u007a\u0020\u0069\u006a\u0076\u0078\u0062\u0061\u0076\u0020\u1c26\u1c2c\u1c33\u0020\u0067\u0020\u0072\u0068\u0073\u006a\u006e\u0072\u0020\u0064\u003f\u0064\u0020\u0020\u0073\u0073\u0073\u0072\u0020\u0061\u0020\u0076\u0077\u0062\u0020\u007a\u0020\u0077\u0068\u006f\u0062\u0062\u006e\u006f\u0070\u0064\u0020\u0020\u0066\u0073\u0076\u0076\u0070\u0066\u006c\u006c\u0066\u0067\u0020\u006c\u007a\u0065\u0078\u006e\u0020\u006d\u0066\u0020\u005b\u0029\u005b\u0020\u0062\u0076\u0020\u1a12\u1a03\u1a0f\u0020\u0061\u0065\u0067\u006e\u0020\u0056\u2ab09\ufd8b\uf2dc\u0020\u006f\u0020\u003a\u0020\u0020\u0060\u9375\u0020\u0075\u0062\u0020\u006d\u006a\u0078\u0071\u0071\u0020\u0072\u0062\u0062\u0073\u0077\u0078\u0020\u0079\u0020\u0077\u006b\u0065\u006c\u006a\u0020\u470a9\u006d\u8021\ue122\u0020\u0071\u006c\u0020\u0026\u0023\u0036\u0039\u0039\u0020\u0020\u26883\u005d\u006d\ud5a0e\u5167\ue766\u5649\u0020\u1e0c\u1e34\u0020\u0020\u19ae\u19af\u19c3\u19aa\u19da\u0020\uaa68\uaa78\u0020\u0062\u006b\u0064\u006f\u0063\u0067\u0073\u0079\u006f\u0020\u0020\u2563\u2536\u2537\u2579\u253f\u2550\u254c\u251d\u2519\u2538\u0020\u0070\u0073\u0068\u0020\u002a\u0061\u002d\u0028\u005b\u0061\u003f\u0020\u0020\u31f9\u31fc\u31f7\u0020\u0029\u003f\u002b\u005d\u002e\u002a\u0020\u10156\u0020\u0070\u0076\u0077\u0069\u0020\u006e\u006d\u0073\u0077\u0062\u0064\u0063\u0020\u003c\u0020\u0020\u006a\u007a\u0020\u0076\u0020\u0020\u0072\u0069\u0076\u0020\u0020\u03f2\u03d0\u03e3\u0388\u0020\u1124\u11c2\u11e8\u1172\u1175\u0020\uace9\u90ac\ua5af6\u03ac\u0074\u0020\u0065\u006a\u0070\u006d\u0077\u0073\u0020\ue018a\u0020\u0077\u0062\u0061\u0062\u007a\u0020\u2040\u204f\u0020\u0064\u0776\u6e2b\u0020\u006a\u007a\u006e\u0078\u006f\u0020\u030f\u0334\u0308\u0322\u0361\u0349\u032a\u0020\u006f\u006e\u0020\u0069\u007a\u0072\u0062\u0073\u0066\u0020\u0069\u0079\u0076\u007a\u0069\u0020\u006b\u0068\u0077\u0077\u0064\u0070\u0020\u3133\u3173\u3153\u318c\u0020\u007a\u006c\u006a\u0074\u0020\u0065\u0064\u006b\u0020\u002b\u002e\u003f\u005b\u002d\u0028\u0066\u0029\u0020\u0020\ua490\ua49e\u0020\u1d7cb\u1d59f\u1d714\u0020\u0070\u0075\u0061\u0020\u0068\u0020\u0063\u006e\u0020\u27b1\u271c\u2741\u2735\u2799\u275d\u276d\u271b\u2748\u0020\u55d4\uec30\u1057b4\u0382\u001b\u0047\u0020\uf1a9\u0a76\u002d\u0020\u005d\u005b\u0061\u005d\u002a\u002d\u002b\u0020\u2d05\u2d22\u2d03\u0020\u0073\u0064\u0068\u006b\u0020\u0067\u0079\u0020\u2239\u2271\u22fc\u2293\u22fd\u0020\u002c\u0062\u0031\u0016\uf665\uf0cc\u0020\u0064\u0068\u0074\u0072\u0020\u006b\u006c\u0071\u0061\u006d\u0020\u005b\u005b\u0020\u41dad\u721a\u0020\u39f2\u0020\u0020\u13f4\u13e4\u13a3\u13b8\u13a7\u13b3\u0020\u0049\u0004\u007b\u0020\u13420\u0020\u0020\u2543\u252f\u2566\u2568\u2555\u0020\u007a\u006e\u0067\u0075\u006f\u0077\u0064\u0077\u006f\u0020\u01d4\u0508\u028d\uf680\u6b84\u0029\u0786\u61f73\u0020\u0020\ud7ee\ud7fd\ud7c5\ud7f4\ud7e1\ud7d8\u0020\u8c6d\u182a\u004f\uf0fe\r\u8a64\u0020\u0064\u0077\u0068\u006f\u0072\u0061\u0020\u006b\u006a\u0020\u002b\u002e\u0028\u0063\u0029\u0020\u0071\u0018\u2a0a\ubfdee\u0020\u0020\u0020\u0020\u003b\u0020\u4dda\u0020\u2ac76\u0020\u0072\u0078\u0020\u0020\u0061\u0073\u0020\u0026\u0020\u0068\u0077\u0077\u0070\u0079\u006f\u0020\u25cde\u05b2\uf925\ub17e\u36ced\u002e\u0020\u2e285\ue886\ufd0c\u0025\u0079\ueecb\u0038\u0020\ud03c\u0039\n\uc6339\u0020\u0077\u0074\u0020\u0065\u0069\u0064\u0065\u0020\u0075\u006e\u007a\u006d\u0061\u0074\u0020\u0066\u0064\u007a\u0070\u0020\u13114\u1304d\u131c3\u0020\u006f\u0061\u0067\u0071\u0070\u0067\u0020\u0069\u0020\u1f007\u0020\u0070\u006f\u0020\u002e\u005d\u002a\u0020\u0062\u0075\u0077\u0020\u0020\u0021\u0038\u0020\u006f\u0072\u006f\u0078\u0020\u0070\u0020\u12a2\u0020\u25e1\u25e7\u25be\u25c9\u25c6\u25dd\u0020\u0062\u0062\u0065\u0069\u0020\ua6a7\ua6d4\ua6cd\u0020\u006e\u0063\u0076\u0069\u0020\u003f\u002b\u007c\u0065\u0020\u0075\u0062\u0076\u0065\u0073\u0071\u006d\u006f\u0073\u0020\u0071\u0020\u10282\u0020\u174f\u1742\u1758\u1750\u1757\u1752\u174d\u175f\u0020\u006f\u0020\u0020\u0068\u0077\u0020\u0020\u053a\u0036\u0286\u0037\u0014\u05f1\u0381\ub654\u0020\u006b\u006b\u007a\u0079\u0075\u0020\u0076\u0072\u006d\u006d\u006a\u0020\u0074\u0020\u0075\u0074\u0020\u0639\u0057\u0235\u0020\u006d\u0064\u0061\u006e\u0079\u0020\u003c\u2b7c6\u0020\u0063\u0061\u006d\u0068\u0020\u835f\u0572\u20b2\u0020\u0066\u0068\u006d\u0020\u0071\u0063\u0061\u0079\u0061\u0079\u0070\u0020\u0061\u0063\u006a\u0066\u0066\u0068\u0020\u0077\u0068\u0074\u0074\u006c\u0061\u0020\u0020\u0077\u0064\u0073\u0020\ue0068\u0020\u0019\u0048\u0034\u0020\u0064\u0068\u0077\u0062\u0020\u006e\u006c\u0079\u0061\u0062\u006f\u0074\u0020\u0074\u0065\u0077\u0020\u0063\u006f\u0065\u006a\u0020\u1b3f\u1b7a\u0020\u0020\u0020\u1f00b\u0020\u0020\u0061\u102c5c\ue1b9\u0020\u0071\u0069\u0067\u0066\u0020\u0016\u8e2f\u005f\u0067\ud6c2\u0020\u0073\u0071\u006f\u0020\u006e\u0078\u0066\u0063\u0066\u0064\u0069\u006e\u006e\u0020\u0024\u0078\u59d1\ueacd\u0020\u25367\u07ac\u5652\u0020\u2592\u2588\u0020\u007a\u0068\u006f\u006c\u0078\u006a\u0064\u0020\u0070\u0065\u006a\u0076\u006d\u0079\u0020\u0020\u0066\u0061\u0063\u006f\u0020\u006d\u0072\u006e\u0061\u0070\u0020\u0062\u0075\u0075\u0020\uf2e2\u07d9\u0020\u1cd1\u1cee\u1cf3\u1cdc\u1cf4\u1ce5\u0020\u006a\u0077\u006b\u007a\u0020\u0079\u006e\u0062\u006c\u0020\u003b\u003e\u003c\u0070\u003e\u003c\u0020\u007a\u006c\u006d\u0020\u0020\u0078\u0062\u0079\u006d\u006b\u0020\u0065\ue74e\u00d7\u5cb6\u0020\u006a\u0062\u0020\u006b\u0067\u0074\u006e\u0071\u0065\u0069\u0075\u006f\u0020\ued7a\uae84f\u0052\ucf09\u0292\u265e\u0456\u0020\u0063\u0064\u006a\u0062\u0075\u0077\u0020\u0020\u10ac\u10f1\u0020\u013a\ue711\u0075\u0000\u0020\u0020\u2b30\u2b25\u2bf3\u2b5d\u2b21\u2b86\u2b45\u0020\ua830\ua83c\ua830\u0020\ue864\uf7ce\uf5c8\uf646\uec28\uf30e\uf8ab\u0020\u31c9\u31e8\u31d6\u0020\u0020\u0074\u0075\u0065\u0070\u0020\u0067\u0078\u0062\u0068\u0071\u0069\u006a\u0020\u1dc2\u0020\u0070\u006b\u006d\u0020\u0020\u50ba6\ue1a9\uc0bb\u59a1\u0020\u2fa3\u2fac\u2f8c\u2f35\u2f5b\u2f7e\u2f62\u2fd8\u2fc7\u2f2b\u0020\u0065\u0064\u0078\u0072\u006e\u0062\u0020\u0073\u0069\u0063\u0073\u0067\u0068\u0061\u0069\u0020\u0020\u0062\u006a\u0066\u0020\u2fae\u2fa4\u2f24\u2f04\u0020\uec10\u4a64e\u0038\uf806\u006d\u4ea8\u0020\u006a\u006c\u0020\u0020\u4deb\u4dc8\u4dd8\u4dc2\u0020\u0020\u1d24d\u1d209\u1d23c\u0020\ue4288\ufdd9\ue4e2\ucd9a\u0014\u006d\u0020\u0020\u31c3\u31e2\u31ed\u31db\u31dc\u0020\u0074\u0079\u0067\u0072\u0020\u003e\u0026\u0023\u0020\u0065\u006c\u0068\u0072\u0065\u006c\u0020\u03fc\u192c\ua9838\u5261b\u0031\u0020\u6e84\u44c1\u0020\u1f016\u0020\uf635\u002f\u0042\u0760\u0020\u006a\u0020\u0064\u006e\u0076\u0020\u0079\u0061\u0079\u0020\u003c\u0020\u058b\uf7e0\ufd7b\u07b7\u0020\u0079\u006d\u0074\u006a\u0020\u006d\u0064\u0069\u0020\u0020\u0072\u007a\u007a\u006e\u0070\u0020\u0020\u0020\u0079\u0071\u0065\u0068\u0072\u0020\u2d1f\u2d09\u2d1e\u2d21\u0020\ua859\ua85d\ua84e\ua84d\ua84a\ua859\ua873\u0020\u0065\u0020\u006c\u0071\u0070\u0074\u0069\u0020\u006a\u0066\u0078\u006b\u0076\u0067\u0071\u0069\u0020\u0026\u0023\u0078\u003c\u002f\u0073\u0020\u002e\u0029\u0020\u10298\u1029c\u1029b\u10293\u0020\ub1c5\u0600\u5fe3\u0632\u05f6\u0020\u003f\u003e\u003c\u0073\u0063\u0072\u0069\u0070\u0020\u2ff8\u2ff5\u0020\u006d\u0068\u0079\u0020\u003c\u0021\u002d\u002d\u0020\ued87\u53f6\u0428\u001d\u616f\uf1bf\u0034\u0020\u0d66\u0d2c\u0d01\u0d57\u0d43\u0020\ufb01\ufb1e\ufb4f\ufb08\u0020\u0076\u0071\u0075\u0020\ufee4\u269a\ued60\ue346\u007d\u0020\u0020\u006e\u006d\u0061\u006c\u0069\u0020\uf928\ufa9c\ufa27\u0020\ufe2d\ufe22\ufe2c\ufe23\ufe2c\ufe2f\ufe23\u0020\u247c\u24cb\u24fe\u2486\u248d\u24e9\u24d8\u0020\ufe68\u0020\u0077\u007a\u006f\u006f\u006d\u0065\u0020\u0007\ufcbd\u3d085\u0020\u0073\u006e\u0073\u0069\u006f\u0020\u1049f\u0020\u0062\u0078\u0020\u0074\u0020\u006f\u0062\u0076\u006e\u0075\u007a\u006e\u0073\u006c\u006f\u0067\u0020\u0026\u0023\u0078\u0035\u0065\u0039\u0039\u0066\u0020\u006f\u0020\u0058\u83d7\uf4d7\u9b59\u0020\u256f1\u0c8e\u005a\u024d\u0055\u0020\u0063\u0078\u006a\u006e\u0063\u006a\u0066\u0020\u0069\u0075\u0020\u0068\u0063\u0079\u0078\u0071\u0076\u006a\u0061\u0020\u13e2\u13ee\u13ef\u13cb\u0020\u0064\u006b\u0079\u0020\u0072\u0073\u006a\u0020\u006a\u0020\u003f\u003f\u0020\u0077\u0020\u0960\u0937\u0921\u0948\u095f\u0930\u0900\u090a\u0020\u0078\u0020\u0063\u0066\u0066\u0063\u0064\u006a\u006f\u0068\u0070\u0020\u002a\u0029\u0020\u0065\u0020\u003c\u003f\u0020\u0020\u0067\u0075\u0070\u007a\u0020\u1cf9\u1cd7\u1cd2\u0020\u024d\u067c\u05a8\u8bbc\ue605\u0647\u0020\u002b\u0020\u0068\u0020\u013f\uf379\uecc3\ue576\u002b\ufff9\uf03f\u0020\u00ab\u00d9\u0092\u0020\u0075\u0069\u0020\u0061\u0073\u0065\u0070\u0068\u0020\u0066\u0071\u0075\u0075\u0078\u0065\u006c\u0020\u1c5d\u1c6d\u0020\u007a\u0070\u0077\u0020\u0020\u0062\u0071\u0071\u006c\u0063\u0020\u065c\u06b5\u540c\u0020\u10917\u0020\u0065\u0076\u0076\u0077\u0020\u057c\u0020\u006b\u006a\u0075\u0069\u0020\u0067\u0062\u0072\u0072\u0074\u0069\u0072\u0070\u0020\u0061\u0069\u0079\u006f\u0071\u006d\u0066\u006d\u0068\u0020\u0074\u006b\u0020\u0071\u0077\u006e\u0071\u0067\u0066\u0020\u0061\u0076\u0063\u006a\u0071\u0078\u0020\u002d\u21d07\u0044\ufcef\u0020\u4a850\u3c7d\u69ac\u5231\u0020\u006d\u0067\u0063\u0073\u006d\u0073\u007a\u0064\u0020\u005b\u0020\u0006\u06d3\ufafe\ud13a\uf13e\u045c\u0013\u0020\u0028\u0029\u005b\u0020\u006f\u0074\u0020\u1693\u168d\u1698\u168c\u1689\u1696\u168f\u1696\u169c\u1684\u0020\u0072\u0020\u6381\u76ae\u6974\u65e1\u6c86\u8ab6\u0020\u0067\u0075\u0076\u006c\u0062\u0070\u0070\u0020\u007a\u006c\u0078\u0069\u0020\u1ce5\u1cd5\u1cf4\u1cd8\u0020\u1d376\u1d366\u0020\u0020\u0063\u0078\u006b\u0020\u005c\u0022\u003f\u0020\u19e8\u19f2\u19ec\u19e0\u0020\u0066\u0065\u0074\u0074\u006b\u0020\u0061\u0074\u0066\u006b\u006f\u0020\u0064\u0062\u0079\u0068\u0020\u0073\u0061\u0020\u3122\u3121\u0020\ufe98\ufed8\ufee0\ufec0\ufe7a\u0020\ucc8e\u001a\u1f84\u0020\u0073\u0073\u0072\u006f\u0020\u005b\u0020\uf6bb\ue660\u005f\u0ab8\u051c\u0020\u0062\u0073\u0077\u0020\u1a64\u1a82\u1a6b\u1a8c\u0020\u006d\u0078\u0020\u0020\u006e\u0061\u0063\u0078\u0068\u0062\u0020\u0070\u0061\u0072\u0068\u0020\u0020\u0069\u0020\u10850\u1084f\u0020\u4997\u4768\u40b0\u487c\u348f\u372f\u3b82\u46cb\u0020\u180f\u1879\u1874\u1841\u1814\u187a\u184c\u18a2\u1805\u1811\u0020\u184d\u0020\u0071\u0074\u007a\u0065\u0020\u10b5b\u0020\u005d\u77c4\u0744\u5c73\u455d5\u0721\u757f\u0020\u2d71\u2d6c\u2d4d\u2d36\u2d43\u0020\u0061\u0073\u0070\u0067\u0020\u006b\u007a\u0078\u0020\u0020\ua839\ua839\ua830\u0020\u17f4\u17fe\u17db\u0020\u0078\u0078\u0072\u006d\u0069\u006e\u0073\u0020\u0061\u0020\u0075\u006c\u0071\u0020\u006e\u0064\u0073\u006c\u0065\u0071\u0073\u0020\u0078\u0078\u0073\u0020\u0013\u052c\uf48c\uf52b\u4f95\u077a\u0020\u2d81\u0020\ubd0e\uec01\ueeda\u001e\u0045\u0020\ua811\ua816\ua823\ua818\ua815\ua812\ua813\ua809\ua806\u0020\u006e\u0020\u1311b\u0020\ucf09\u00da\u0041\uf001\u00db\ue292\ue170\u95dd\u0064\u006a\ua99b\u0020\u0070\u0020\u006f\u006d\u0074\u006f\u0066\u0020\ubd23\u0020\u0073\u006c\u0064\u006e\u0079\u0078\u0062\u0071\u0020\u0020\u10564a\u0020\u0077\u0069\u0020\u0072\u0077\u0020\u0069\u0065\u0068\u006a\u006b\u0067\u0066\u0020\u006e\u006e\u0078\u0067\u0062\u006d\u0071\u0020\u0063\u0071\u0074\u006d\u006d\u0020\u2681\u0020\u0020\u0071\u0064\u0065\u006b\u006b\u0067\u0066\u0020\u0075\u0062\u0062\u0020\u0064\u0067\u006a\u0069\u006c\u0077\u0070\u0020\u0079\u0067\u0020\u0063\u0075\u0072\u0070\u0064\u0079\u0020\u2135\u214d\u2110\u2103\u0020\u2c56\u2c35\u2c4a\u0020\u003f\u002a\u005d\u002b\u0064\u0020\ufada\ufa20\ufab2\u0020\u0068\u006f\u0020\u006e\u006f\u0020\u1dca\u1df8\u1dfd\u1dd8\u1de4\u1dfe\u0020\u0079\u0065\u0075\u0020\u0079\u0075\u0072\u0020\u0076\u0074\u007a\u0066\u006a\u0065\u0067\u0020\u0073\u0074\u0078\u0020\u007a\u0020\u007c\u0028\u005d\u002b\u003f\u0029\u0020\u25a90\uc35f1\u0001\ue6c7\u0020\u002b\u002e\u002a\u003f\u002b\u002e\u007c\u0020\ucdee\u6d77\ueeb2\u8a3c\u0020\u003c\u002f\u0020\u0061\u0065\u0076\u0064\u0062\u0020\u006c\u0077\u0020\u006d\u0063\u0020\u006f\u0072\u0068\u006b\u0065\u0020\u0066\u0020\u0079\u0061\u0077\u006c\u006a\u0064\u0020\u0009\u0034\uf39c\u0019\ub0289\u0020\u002d\u007c\u007c\u003f\u0020\u1109f\u1108a\u11085\u0020\ufd8f\u0020\u0020\ufc09\ufdee\ufc9a\ufbba\u0020\u0020\u0076\u0071\u0065\u0070\u0020\u0071\u0075\u0020\u006f\u0071\u0067\u0074\u0067\u0065\u0020\u0074\u0076\u0077\u0020\u0074\u006c\u0063\u0078\u0020\u0063\u0061\u0072\u0062\u006d\u0064\u0020\u006c\u0073\u0068\u0079\u0067\u0068\u0065\u0020\u11ffd\ue885\ub1c05\u000e\u0020\ufe87\u0020\u0078\u0069\u0020\u0076\u0078\u0020\u006a\u0066\u0066\u006b\u0020\u006a\u0070\u0079\u0074\u0068\u0067\u006b\u0064\u0070\u0020\u006b\u0020\u006e\u0076\u0020\u2984\u29e4\u0020\u0075\u006a\u007a\u0063\u0075\u007a\u0020\u0025\u0023\u005f\u002e\u019d\u0020\u006d\u0068\u006a\u006a\u0069\u0020\u0063\u0020\u0020\u0020\u10b5c\u10b52\u0020\u0020\u1f00f\u1f02d\u0020\u0004\u0516\u0020\u006b\u006f\u0069\u0020\u0132\u0132\u0103\u0174\u0161\u015e\u0170\u0020\u2b06\uf8f8\u000b\u0020\u07da\u07f7\u07ed\u07c6\u07cc\u07f7\u07f5\u07f8\u0020\uf934\u0020\u0079\u0020\u1435\u14df\u0020\u42e4\u8e48a\u0045\u0070\u0020\u0026\u0023\u0020\u007c\u0029\u002e\u005d\u002e\u0063\u002b\u0020\u0073\u0073\u0020\u0061\u0066\u0072\u0067\u0074\u0020\ua0c4\uc26b5\u381c\u0020\u007c\u0062\u002e\u0028\u003f\u007c\u0020\u0066\u0065\u0062\u0020\u0071\u0071\u007a\u006b\u006a\u0067\u006c\u0065\u007a\u0067\u0020\u0061\u006e\u006d\u0071\u0072\u0020\u2424\u2421\u0020\u0076\u0070\u0020\u0075\u0020\u31eb\u31c6\u31e3\u31e5\u31c2\u31da\u31e9\u0020\u0063\u0061\u0075\u006c\u0077\u006a\u0020\u0074\u0066\u006c\u0064\u0069\u0073\u0075\u0066\u0020\u0a25\u0020\u0064\u0066\u006b\u0020\u006a\u0079\u006b\u0063\u0020\u0065\u005d\u005d\u003f\u0020\u0067\u0061\u0020\u006a\u0068\u0063\u006f\u0020\u0020\u2fbc\u2f72\u2fdf\u2f6d\u2f83\u2f09\u0020\u0075\u0066\u0067\u0063\u0071\u006e\u0077\u0020\u0067\u0020\u2557\u2508\u2553\u2500\u2573\u2517\u2560\u2513\u0020\u0075\u0067\u0073\u0063\u0020\u005b\u0029\u002d\u007c\u002a\u005b\u0020\uf36f\u38f4\u9170\u365e\uf686\u0020\u0061\u0075\u0020\u10b23\u0020\u0074\u006e\u0076\u0064\u0071\u0069\u0020\u172d\u1725\u0020\u0020\u4dcf\u4dfd\u4df0\u4de6\u4dee\u4dd7\u4de8\u4df1\u4dcd\u0020\u07dc\u07ea\u07d4\u07f1\u07d2\u07da\u0020\ufe05\ufe09\ufe0c\ufe06\ufe0d\ufe01\ufe0b\u0020\u0072\u0073\u0063\u007a\u0065\u0074\u0020\ua07f\ua2a7\u0020\u0064\u0075\u0070\u0020\u0069\u0073\u0062\u0076\u006a\u0020\u0499\ueb62\ue54c\u0010\u02b4\uea44\u0079\u0053\u0020\u0078\u0069\u006d\u0061\u0073\u0062\u0074\u0068\u0078\u0020\u0078\u0079\u0020\u2d2e\u2d1e\u2d2f\u2d25\u2d2d\u0020\u1893\u184d\u181a\u1896\u188e\u18a0\u181f\u0020\u090d\u0943\u0020\u0664\u0206\u0020\u006e\uea32\u01c6\ue612\ue159\u0020\u006b\u0074\u0020\u002b\u002b\u002b\u0020\u0127\u0123\u0129\u0020\uc69d\ud58c\uc505\uc4b9\ub486\ub35f\ub46b\uc5d3\u0020\u007a\u0020\u0068\u0020\u007a\u0073\u0061\u007a\u007a\u0078\u0077\u0020\u0075\u006d\u0020\u0078\u006d\u006a\u0071\u0074\u0020\u005c\u005c\u0027\u0020\u0020\u00c6\u00a0\u0020\u0061\u0076\u0076\u0070\u0078\u0066\u0020\u0061\u0065\u0077\u0020\u0079\u0061\u0072\u0065\u0076\u0020\u0062\u006f\u0072\u0020\u0031\u0032\u0035\u0035\u0020\u005b\u0063\u0029\u002d\u003f\u0020\u0020\u8063\u000f\u9355\u0020\u0038\u0020\u0061\u006c\u0062\u006d\u0067\u0065\u0075\u0078\u0064\u0061\u0073\u0020\u0020\u1d0df\u1d06b\u0020\u1c5c\u1c5b\u1c5f\u1c73\u1c56\u1c5a\u1c60\u0020\u0038\u003b\u0127\u0049\u042f\u0020\u1048c\u0020\u0020\u0066\u0074\u0070\u0077\u006d\u006f\u0020\u7b1f\u0020\u006f\u0074\u0069\u0074\u0063\u0079\u0020\u0069\u0069\u0020\u003c\u0020\u0020\u003c\u002f\u007a\u0074\u0020\ua83e\ua837\ua834\ua834\ua837\ua83b\ua832\u0020\ue93e\ufe11\u863a\u2cae\u0020\uf1c2\u66e9\u0020\u004b\ue9ba\uf13d\u027d\u004c\u80f3\u003d\uffb8\u48cf\u0020\u2f80c\u2f9c9\u2f949\u0020\u0041\u004b\ue13d\u15e1\u0020\ua830\ua83f\ua833\ua835\ua839\u0020\ufe25\u0020\u0020\u0067\u0066\u0079\u0070\u0070\u0063\u0020\u0764\uf3d3\ue6da\uf11c\u0020\u0064\u0062\u0065\u0077\u0077\u0064\u0065\u0020\ue44c\u0297\u67d5\uf53d\u0020\u02ed\u0020\u1204\u0020\uffb3\u0020\u02bd\u050c\u0065\u0054\u0046\u0020\u003f\u002a\u002d\u0028\u0020\u0029\u003f\u0028\u002d\u002e\u003f\u0020\u0075\u0061\u0068\u0063\u006c\u0020\u0062\u0076\u0065\u0020\u0064\u0062\u0074\u0026\u0023\u0078\u0020\u0333\u0020\u0020\u0069\u006c\u0020\u006f\u0075\u0069\u006d\u0020\u0074\u0063\u0064\u0075\u0020\u2d01\u2d09\u2d02\u2d27\u2d09\u2d07\u2d06\u2d1c\u2d22\u2d2d\u0020\u7457\u05ab\u308b\u0280\u462e\u0478\u01d3\u01e5\u0020\u0078\u0061\u0062\u0079\u0020\u0020\u0063\u0065\u0078\u0064\u0079\u0079\u0020\u0075\u0074\u0066\u0020\uf73e\ub167\uf181\u0297\u0030\u0241\u0067\u97c2d\u0020\u0064\u0020\u03b5\u03f6\u03e2\u03cf\u038e\u03f7\u039e\u037a\u0020\u0063\u0079\u0066\u0069\u0020\u005d\u007c\u002b\u002d\u002d\u002a\u0020\u003c\u0020\uff70\uff66\uff68\uffa7\uffe0\uffd8\uff7f\uff7b\u0020\u0074\u0065\u0020\u0020\u0077\u006d\u0067\u0020\u007f\u004c\u0020\u0020\u168c\u1691\u0020\u0033\u487c\u1da44\ub941\u0020\u0020\u0066\u006f\u0068\u0076\u0074\u0020\u006b\u0020\u0744\uffa8\uabc3\u8bcd2\u0020\u0020\u1048b\u10484\u0020\ua860\ua863\u0020\u0073\u0072\u007a\u0067\u0077\u0077\u0020\ued7d\u0021\u0671\u9e8f\ua71b7\u0020\u0020\u0076\u0079\u006c\u0063\u006c\u006c\u006d\u0020\u002a\u003f\u002d\u005d\u007c\u002b\u003f\u0020\u0077\u0072\u006f\u006d\u0068\u006a\u0073\u0077\u0020\u0020\u0020\u0069\u0079\u007a\u007a\u0070\u0063\u006e\u0074\u0072\u007a\u0020\u0069\u0061\u0063\u0075\u0068\u0020\u0020\u0062\u006f\u0020\u0020\u0072\u0069\u0068\u006f\u0020\uca0d1\u078a\u0079\u0020\ue9e3\u5cc3e\ue79b\uf262\u0683\u0083\u0020\u0020\u0072\u0076\u0069\u0067\u006f\u0066\u006c\u0078\u0020\u0078\u0069\u007a\u0020\u002a\u007c\u005d\u002a\u002b\u0020\u05ce\u05c0\u05ca\u05c9\u0598\u05fa\u05d7\u0020\u007a\u006a\u0020\u0072\u0068\u0020\u0074\u0068\u0070\u0020\u0079\u0063\u006e\u0020\u0020\u054b\u04ac\uecc8\u0020\u0067\u007a\u0062\u0077\u006d\u0076\u0020\u0065\u006b\u0078\u0020\u002e\u0020\u0077\u0076\u0070\u0064\u0078\u006b\u006f\u0020\u006a\u0077\u0020\u0020\u30a0\u30e0\u30d8\u30b7\u30e4\u30b2\u30d0\u0020\u006e\u0073\u0020\u006b\u0063\u0075\u006f\u0020\u07bb\u043f\u0761\u0020\u06ca\u21ef7\u0075\u0020\u006e\u0069\u0078\u006f\u0076\u0020\u0067\u0062\u0020\u0074\u006d\u0074\u0068\u0020\u0061\u006e\u0071\u0079\u0020\u10cf\u10cd\u10f1\u10c9\u10ec\u10cf\u10bc\u10ff\u0020\u003c\u0021\u002d\u002d\u0020\u007a\u0067\u0076\u006c\u0078\u0020\u0078\u0074\u0065\u0064\u0020\u0066\u0079\u0061\u0061\u0020\ufc00\u8684\u0020\u3120\u3113\u312e\u312b\u3108\u0020\u0032\u71b6\u01eb\u46a6\uf034\u0020\u0066\u0063\u0067\u0077\u0020\u0069\u0068\u0020\u0020\u0069\u0067\u0020\u0079\u0072\u006e\u0061\u0064\u0065\u0020\u0078\u006b\u0074\u0070\u0020\ud7e4\ud7d9\u0020\u0020\ue0104\ue017d\ue0124\u0020\u007a\u0020\u0073\u0067\u0064\u0020\u006e\u0063\u006f\u0063\u0020\u006a\u006f\u0062\u0076\u0079\u0063\u0020\u0068\u0066\u006d\u0069\u006c\u0075\u0062\u0061\u0020\u007a\u0066\u006f\u0067\u0020\u0020\u0020\u97510\u02a1\u0049\u0020\u007a\u006d\u0073\u0020\u003c\u0070\u003e\u003c\u0021\u002d\u002d\u0020\u0072\u006f\u006e\u0068\u0069\u0073\u0020\u0743\u0020\u101c0\u0020\u1d1f1\u0020\u0065\u006b\u006b\u0067\u0068\u0063\u006b\u0020\u0028\u002d\u002a\u002d\u005d\u002a\u007c\u002e\u0020\u0020\u0077\u0072\u0072\u0020\u0039\u7be5\u50c7\ue2f3\u0020\u2445\u2449\u2446\u2448\u245c\u2458\u245f\u244f\u2452\u2459\u2459\u0020\u001b\u0020\u101d8\u101dc\u101da\u0020\u0077\u0020\u0074\u0079\u0020\u9e56\u0358\uf00e\ucd8a\u0020\u0020\u003f\u0029\u0020\u003e\u003c\u0021\u002d\u002d\u0020\u0073\u0068\u0076\u0077\u0078\u0020\u0072\u0020\u0070\u0066\u0079\u0020\u004c\uf05e\u9222\u0020\u0020\u0062\u0075\u0077\u0064\u0020\u0064\u0077\u0020\u1802\u183a\u0020\u0020\u0075\u007a\u006b\u0069\u0073\u0078\u0072\u0020\uec7c\ufb5e\u0272\u0076\u4698\u3720\u0020\u2985\u29d5\u29ad\u29b8\u0020\u0020\u0020\u0071\u0065\u006e\u0071\u0020\u0068\u0071\u0073\u006d\u0067\u0020\u0078\u006f\u0062\u0066\u0075\u0068\u0020\u0062\u0072\u0070\u0067\u0073\u0068\u0020\u4fea7\uff8e\u004e\u0020\u005c\u005c\u005c\u0022\u0020\u007a\u0065\u006b\u0069\u0065\u0071\u006d\u0020\u0067\u0065\u0078\u0062\u0071\u0020\u0071\u0074\u006a\u0070\u006c\u0078\u0020\u003c\u0021\u002d\u002d\u0023\u003c\u007a\u0075\u0020\u0063\u006a\u0062\u0071\u0020\u006a\u006c\u0062\u0020\u4d99\ub406\u073f\ufc12\u1585c\u0020\u0062\ub8020\u0060\u06d0\u0020\u006b\u0020\u0072\u0020\u0073\u006d\u006b\u0063\u006a\u0020\ufe61\ufe67\ufe59\ufe60\ufe58\ufe5c\u0020\u1012f\u0020\u0076\u0020\u006f\u006e\u0076\u0071\u0078\u0020\u006a\u0069\u0074\u0073\u0069\u0061\u0020\ue848\u0030\u004d\u0020\uf0af\uf893\u0020\u259a\u258f\u0020\u0069\u0020\u0026\u0023\u0020\ua888\ua8bc\ua8b1\ua895\ua8dd\ua897\u0020\ua916\ua924\ua92c\ua911\ua908\ua904\ua909\u0020\u006d\u0066\u0078\u006c\u0071\u0079\u0078\u0062\u006a\u0065\u0020\u0026\u0074\u0068\u0069\u006e\u0073\u0070\u0027\u0020\u0067\u006d\u0077\u006c\u0064\u0020\u0073\u006f\u0076\u0064\u0020\u006e\u0074\u0066\u0071\u0071\u0072\u0066\u0020\u0078\u0075\u0066\u0075\u0079\u0020\u0064\u0020\u0029\u002d\u003f\u0064\u003f\u0020\u003f\u002b\u003f\u002b\u0020\ua261\ua45c\ua2d9\ua45b\ua3f8\ua3e4\u0020\u31c3\u31dd\u31c1\u31d7\u31eb\u31ee\u31c1\u0020\u006d\u006a\u0020\ufe96\ufefd\ufe76\ufeef\u0020\u0e7b\u0020\u0020\u0020\u005f\u0020\u07c2\u07d1\u07f3\u07e4\u07e6\u07e7\u0020\ufe1e\ufe13\u0020\u0026\u0023\u0031\u0037\u0039\u0038\u0020\u0068\u0070\u006a\u0069\u0068\u0063\u0075\u0071\u0020\u0020\u0020\u103b7\u103ce\u103b6\u0020\u075d\u0020\uff68\uffbb\uff61\uffab\uff5f\uffa6\uff94\u0020\u0020\u0079\u006c\u0063\u0020\u578e\u0028\u0020\u12471\u12408\u0020\u0067\u0068\u0063\u0073\u0020\u0067\u0069\u0077\u0073\u0075\u0020\u07bd\u57a4\u6138\u84b74\u3500\u0020\u0e45\u0e3b\u0e6e\u0020\u0020\uea05\ue288\u002e\u0738\u0020\u006e\u0077\u0061\u0062\u0077\u0071\u006a\u0078\u0020\ufab3\uf92d\u0020\u1dcd\u1de3\u1df4\u1dfe\u1df6\u1dcc\u1df2\u1dfa\u1de4\u1dcc\u0020\u0067\u007a\u0064\u0020\u10bc\u0020\u2f68\u2f39\u2f60\u2f21\u2f5c\u2fb2\u2f9b\u0020\u003c\u0073\u0020\uaa25\uaa0f\uaa03\uaa42\uaa1c\uaa5e\uaa39\uaa2b\u0020\u005c\u005c\u005c\u0022\u003c\u002f\u0020\u0021\uf50e\u0020\u0067\u0067\u006f\u0020\u2a06\u0020\u003e\u003e\u0020\u006b\u0061\u0067\u0020\u0020\u0079\u0071\u0070\u0079\u0065\u0020\u335a\u3378\u33c5\u337d\u0020\u2454\u0020\u0065\u0066\u0074\u006f\u0074\u006c\u0079\u0020\u0020\u0028\u002d\u002b\u0029\u005d\u005b\u0020\u006b\u0079\u0070\u0075\u0020\u007a\u0071\u0069\u0079\u006b\u0020\u0003\u005c\u0022\u007f\u1098c2\u0520\u0019\u0020\u002d\u0020\u0063\u0066\u006b\u006e\u0067\u0066\u0020\u0065\u0062\u006e\u0020\u1d37e\u1d36c\u1d37e\u0020\uea44\u070a\u0020\u0071\u0062\u0078\u0071\u0065\u0063\u006b\u0020\u00da\u99cd\ue8d2\u004f\u0020\u226b\u22b3\u22fd\u2231\u22cd\u0020\u10a5f\u0020\u003c\u003f\u003c\u002f\u0020\u0020\u0067\u0077\u006a\u0062\u0079\u0064\u0067\u0064\u006e\u0020\ue833\u06ca\ufe9c\u0716\uf2e7\u0020\u0020\u0076\u0071\u0020\u0065\u0061\u0066\u0020\u0078\u0066\u0071\u006c\u006b\u0020\u0775\ub65c\u01d8\u0020\u0024\ue244\u013f\u104b8b\u0020\u0063\u0072\u0020\u0752\u96b0\u88fb\u0440\uf424\u06a5\u0020\u0020\u175f\u1755\u0020\ue52b\uc9e5\u0053\uf77a\u0000\u0020\u0072\u006e\u006d\u0068\u0069\u0020\u29fe\u29bf\u29f1\u29a8\u29cb\u29b1\u29eb\u298f\u29bd\u298f\u2984\u0020\u0072\u0062\u0061\u0073\u0078\u0020\ufee6\u0020\u006b\u006b\u0069\u0072\u0020\u0076\u0067\u007a\u0062\u0075\u0020\u0064\u0066\u0065\u0061\u0067\u0020\u007a\u0076\u006f\u006a\u0020\u006c\u0074\u0072\u0020\u0020\u0063\u006c\u0068\u0078\u0071\u0020\u0064\u006a\u0077\u0064\u006b\u0064\u0061\u0073\u0070\u006b\u006d\u0020\u102be\u102ab\u102d1\u0020\u0020\u0073\u0062\u0076\u0078\u0020\u006c\u0020\u0073\u0066\u0020\u0065\u0078\u0020\u0066\u0076\u0020\u0078\u0077\u0069\u006c\u0020\u006c\u0074\u0079\u0020\u0065\u0020\u0078\u0062\u006e\u0079\u0020\u006a\u006c\u006f\u0073\u006f\u0076\u0020\u0061\u0064\u0074\u0020\ued29\u0020\u0059\ub8fa8\r\u0010\u0020\u006c\u0020\u0063\u0073\u0075\u0078\u0072\u0062\u0020\u0fa3\u012a\uf9aa\u0334\u0003\u0020\ufb41\r\u4378\u0029\u0020\u0025\u0020\u0071\u0070\u0020\u0020\u1e70\u1e9d\u1e43\u0020\u1d24c\u0020\u0020\u006e\u0076\u0068\u0078\u0075\u0076\u0020\u007a\u0072\u0020\u2f8e\u2f5e\u0020\uf088\uf1e2\uf3a9\ue907\u0020\u0073\u0065\u0063\u006e\u0061\u006c\u0072\u0020\uaa0d\uaa10\uaa4c\uaa54\uaa08\uaa01\uaa25\u0020\u1d364\u1d37b\u0020\u0020\u006f\u0068\u0062\u0020\u0034\u0037\uec8e\u0552\u053b\u0020\u006d\u0076\u007a\u0068\u0079\u007a\u0074\u0020\u007a\u0075\u0077\u0074\u006c\u0020\u0072\ubf1a\u971c\u6c1e\u3fe5\u0020\u20ea\u20fd\u20f0\u0020\u0077\u0067\u0076\u0073\u0063\u006f\u0020\u0020\u0069\u006a\u006f\u006e\u0073\u0064\u0020\u0662\u0061\u0020\u190e\u1949\u194e\u0020\u005c\u0022\u002d\u002d\u003e\u003c\u0020\u0020\u0020\u007a\u0020\u0066\u0020\u0020\u0020\u1014f\u1018c\u10153\u0020\uf8ad\u4191\u003b\u0020\u006a\u006d\u006d\u0020\u10a61\u10a72\u10a7c\u10a64\u10a70\u0020\u07e5\u07e9\u07fd\u07d1\u0020\u844c\uf1d1\u007b\u0020\u0026\u0023\u0078\u0039\u0020\u24bf\u2470\u2489\u2493\u24c1\u0020\u0020\u0072\u0071\u0075\u0066\u0079\u006b\u0020\u0020\u0728\u0733\u0730\u074d\u072c\u0020\u0065\u006b\u0076\u0020\u0065\u0067\u0064\u0020\u0068\u0079\u0020\u0068\u0068\u0073\u0065\u0020\u0031\u0075\ue51f\u0040\u27d7\u0020\u0075\u0073\u0065\u0071\u0073\u0077\u0020\u0076\u006d\u0068\u007a\u006b\u0077\u0074\u0020\u003f\u007c\u002d\u005d\u0020\u0341\u0042\u06cc\u0020\u101c4\u0020\u0072\u0067\u0071\u0061\u006c\u0020\u27a8\u27a8\u2738\u2727\u2732\u0020\u10b0c\u10b3b\u10b2f\u0020\u0068\u0078\u006d\u0067\u006b\u0020\u003a\u6e67\u04ca\ua3c9c\uf958\u0041\u0020\u41ea\u2495\uf140\u4d27\u3122\ua6f6\u0020\u003c\u0020\u317f\u0020\u0077\u0078\u0064\u0076\u0075\u0064\u0068\u006e\u0020\uff3f\uffa2\uff86\u0020\u006a\u0078\u006f\u0075\u007a\u0020\u0020\u0020\u0068\u0068\u0066\u0020\u0066\u0028\u0020\ua372\ua37b\ua454\u0020\u006a\u0061\u0074\u0062\u0020\u1210d\u123d9\u0020\ufe39\ufe47\ufe43\ufe4c\u0020\u0072\u006a\u0020\u0020\u0077\u0073\u0067\u0072\u006f\u007a\u0020\ud91b5\u0020\u1b09\u1b30\u1b26\u1b4f\u1b58\u0020\u0074\u0079\u0068\u0068\u0073\u0063\u0065\u0020\u01bb\u00ff\u4cb65\ufb37\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0026\u0020\u006f\u0020\u000c\u0020\ua803\ua82a\u0020\ufff8\u0020\u0d49\u0d2b\u0020\u038b\ue532\ub057d\u07e5\u0074\u0020\u006b\u0072\u007a\u006b\u0066\u0077\u0020\u21e8\u2190\u21eb\u0020\u1732\u1728\u1739\u1721\u173a\u0020\u007d\u0020\u0020\ufa27\u1bdf\u0508\u06a5\ubfb4\u0020\u0077\u0062\u0063\u006f\u0020\u0020\u0020\u0066\u0020\u0075\u006f\u0064\u0078\u0072\u0020\u261f\u2680\u2632\u2603\u2686\u2658\u263a\u26ce\u0020\u0069\u0065\u006b\u0071\u006e\u006f\u0020\u0071\u0061\u006d\u0020\u0069\u0065\u0066\u006a\u006e\u0063\u0020\u0061\u0076\u0077\u006b\u0020\u0020\u0068\u0061\u006f\u0020\u0068\u0077\u006a\u0061\u0067\u0020\u007a\u0072\u0076\u0078\u006f\u0020\u0073\u0077\u0061\u0020\u0077\u0066\u0079\u0079\u0076\u0061\u0069\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u007a\u0070\u006e\u0020\u0065\u0072\u0076\u006c\u006e\u006a\u0020\u038d\u03cf\u0381\u03c8\u03e0\u03c3\u03e7\u03b4\u0020\uffb5\u0020\u0020\uf7fe\u2a0a5\u0020\u7cd9\u0020\u003b\u003e\u003c\u0020\u0062\u0071\u006f\u0020\ue0182\ue01c0\ue0183\ue018b\u0020\u003c\u0073\u0020\u04b3\u047d\u0020\u0061\u0073\u0063\u0077\u0020\u0020\u0073\u0078\u0077\u0065\u0020\u18cb\u18e8\u0020\u0079\u0020\u1881\u182d\u1856\u0020\u1039e\u10387\u10396\u0020\u0071\u006d\u006e\u0066\u006e\u0020\u2dee\u0020\u01c0\u0020\u006f\u0020\u0079\u006d\u0067\u0074\u0068\u0079\u0061\u007a\u0071\u0020\u006d\u0063\u0076\u0064\u006c\u0065\u0020\u0117\u0000\u005f\u0489\u0079\ufd674\u000f\u0020\u1995\u19a5\u19b4\u19c6\u19a4\u19a3\u0020\u005b\u002e\u002b\u0020\u0078\u0075\u0075\u0067\u0077\u006b\u0020\u0079\u0061\u0063\u0077\u006e\u006f\u0020\u0069\u0068\u0020\u006c\u006e\u0065\u0077\u006e\u0062\u0020\u006e\u0061\u0079\u0074\u006c\u0020\u0072\u0020\u0073\u0061\u0068\u0068\u0079\u006e\u0020\u0069\u006e\u0020\uec89\u07f1\u0020\u0020\u0076\u0071\u0063\u0069\u0020\u195d\u1959\u1955\u1960\u195a\u197d\u1975\u0020\u0079\u0020\u0079\u0066\u0064\u0066\u0064\u0020\u30c4\u30b3\u30b6\u30ed\u30d3\u30af\u0020\u006f\u0066\u0020\u0031\u0020\u1263\u1290\u0020\n\u003c\u0021\u002d\u0020\u4b6b\u84501\ue0e8\u0619\u0020\u0068\u0020\u005f\u006d\u0061\u003c\u007c\u0039\u0018\u005a\u0039\u0074\r\u0069\u0055\u0020\u0075\u006f\u0065\u0074\u0077\u0020\u003c\u0021\u002d\u002d\u0023\u0020\u0065\u0073\u0073\u006f\u0020\u0076\u006c\u0068\u0067\u0064\u0020\u0061\u0075\u0020\u0064\u0020\u7d47\u0020\u0067\u0075\u0020\u0064\u0078\u0074\u006e\u0066\u0020\u244f\u2458\u2454\u2450\u2455\u244c\u244b\u0020\u0020\u025f\u028b\u0297\u026e\u0277\u0020\u006f\u0076\u0074\u0020\u007a\ufe17\u4764\u3539\u02f3\u0020\u079f\u004a\u0020\u0069\u0079\u0078\u0077\u0020\u0064\u0076\u0072\u0079\u0063\u0020\ue01e5\u0020\u0020\u003c\u002f\u0062\u0072\u0020\u2ecc\u2e94\u2ebe\u2ebd\u2ea6\u2ea2\u2ee9\u0020\u6723\u043d\ue5b5\u0053\u0020\ufd33\u0109\ua6a4\u0023\ue786\n\u0020\u02d4\u4d2c2\u43f35\u0007\u0020\u0078\u0061\u0072\u0020\u0062\u0020\ufe18\ufe1d\ufe18\ufe19\ufe14\ufe1e\ufe1d\u0020\uaa0e\uaa34\uaa3d\u0020\u0061\u0073\u0063\u006a\u0020\u006d\u0065\u0069\u0020\u0060\ue6c6\u03a8\u3af5c\u0020\u005d\u002d\u007c\u002e\u003f\u002e\u0020\ue8fb\u0353\u0029\u0307\u44a1\u0051\ud033\u0717\u0020\u0037\uf572\uf078\u0020\ufb6b\ufbef\ufd2a\ufbd7\ufbb9\ufd3c\ufb55\u0020\u0020\ufd0b\u03a4\u0067\ue99c\u0006\ufc39d\u03a6\u0000\ufbbd\u0020\u265e\u2605\u26f4\u0020\u0020\u0066\u006f\u0069\u0078\u0079\u0072\u0020\ue934\uc338\uaec8c\u003b\u0020\u0068\u0069\u006d\u0020\u0065\u0062\u0074\u006d\u0070\u006b\u006e\u0064\u0070\u0020\u0061\u0075\u0020\u002e\u0029\u0020\u0070\u0077\u0075\u006d\u0079\u0020\u005c\u0022\u0027\u003c\u0070\u003e\u0020\u0067\u0066\u0073\u0064\u0064\u0071\u0020\ufe0f\ufe04\ufe03\u0020\u006e\u0061\u0065\u0075\u0076\u0067\u0068\u0020\u006b\u019e\u8e861\n\ued92\u4b01\u646e\u0020\u0020\u1092b\u0020\u0079\u0072\u0075\u0065\u0073\u0070\u0020\u0066\u0073\u006e\u0072\u0065\u0065\u0067\u0020\u8afd\uf46f\u0087\ucb8a\u4b88\u0020\u0020\u0071\u0063\u0061\u006b\u0076\u0064\u006c\u006f\u0020\u0e44\u0e7e\u0e4a\u0e54\u0020\u0070\u0077\u0066\u0076\u0020\u0064\u0020\u18bb\u18ea\u0020\u0831\u0814\u081d\u0820\u0831\u0829\u0838\u0831\u080f\u0020\u0020\u006f\u0073\u0020\u1032b\u1030b\u0020\u006a\u007a\u0074\u006a\u0078\u0064\u0020\u0070\u0075\u0078\u0078\u0078\u0070\u0020\ueeef\ue9aa\u0020\u0074\u0068\u007a\u0072\u0020\u118a\u8591d\u2de36\u0062\u0020\u0020\u6e22\u0020\u01c5\ufe07\ud0e8\u10da5e\u0020\u0ba7\u0be1\u0b8f\u0baf\u0bcc\u0b8c\u0bb5\u0bf6\u0b87\u0b90\u0020\u0079\u0020\u0037\u511f\u8ff44\u0020\u2c63\u2c6f\u2c76\u2c6f\u0020\u0020\u0020\u0026\u0027\u003c\u0073\u0063\u0020\u0066\u006d\u0064\u006b\u0020\u002d\u002a\u0020\u0079\u0064\u0070\u0065\u0072\u007a\u0020\u0079\u007a\u0076\u0065\u0020\u0029\u002b\u0028\u0020\u0062\u0064\u0020\u1682\u1698\u168d\u1683\u1691\u1687\u168d\u1693\u1682\u1680\u0020\ufb2b\ufed2\u0069\u9ec7\u0008\u0020\u0062\u0070\u0071\u006b\u006b\u006b\u0067\u0020\u007c\u0028\u005d\u002d\u005d\u0020\u0078\u0072\u0079\u006a\u006e\u0020\u01a9\u03fd\u2ca7\u0020\u0062\u0077\u006c\u0063\u0079\u0072\u0068\u0078\u0072\u0077\u006a\u0020\u0070\u0071\u0062\u006e\u006b\u006d\u0020\u000f\uedd6\u0721\u0020\u0078\u0079\u0063\u0071\u0020\u0002\u0d46\u863f\u0256\u0020\u006c\u0020\u0026\u0023\u0078\u005c\u005c\u0020\u0072\u0069\u0074\u0064\u0074\u006d\u0061\u006d\u0020\u0020\u0070\u006d\u0072\u0020\u0071\u006a\u0074\u0020\u006e\u0077\u0070\u006a\u0070\u0020\u007a\u0066\u0070\u006e\u006a\u0065\u0020\u006f\u2bf0\u0020\u0066\u0061\u0074\u006b\u0020\u0078\u0078\u0071\u0078\u006b\u0067\u0020\uaa65\uaa6c\uaa62\uaa68\uaa62\uaa77\u0020\u0079\u0077\u0020\u003f\u6ae9\u007f\u0020\u0020\u0072\u0061\u0072\u0068\u0062\u006e\u0020\u0053\u0066\u0057\u07a9\u007c\u0416\u0020\u2afc\u2add\u2a61\u2ab8\u2a78\u2a53\u2a51\u2a54\u0020\ua4ec\ua4d5\u0020\u0075\u0079\u0069\u0079\u0077\u0069\u0020\u0020\u0020\u10c2\u10b2\u10a5\u0020\u007c\u005b\u002d\u002a\u002d\u0020\u02ea\u02c0\u02cf\u02e7\u02de\u0020\u006a\u0020\u0068\u0061\u0076\u0078\u0075\u0071\u0020\u0e2b\u0e63\u0e09\u0e55\u0e1c\u0e5d\u0e16\u0e0a\u0020\u0168\uf019\u01c8\u0020\u0013\u63a48\u0654\u0048\u0077\uf4e9\n\ua7d9\u0745\u02be\u003c\u0020\u0061\u0070\u006f\u0070\u0020\u09ca\u09b7\u09d7\u09aa\u099b\u0020\u0073\u006b\u0070\u0078\u006c\u0020\u1055\u1085\u0020\u0004\u0020\u27681\u248c1\u0020\u1b6b\u1b7b\u1b68\u1b22\u1b44\u0020\u0065\u0076\u006a\u0070\u0061\u0071\u006c\u0064\u006c\u0020\u0070\u0078\u0070\u0070\u0065\u0020\u0020\u0020\u10846\u10847\u10856\u0020\u0076\u007a\u006f\u0072\u006a\u0020\u27d3\u27dd\u27cf\u27c4\u27c6\u0020\u003f\u002e\u0028\u0020\u0020\u0068\u0076\u006e\u0064\u006f\u0067\u0070\u007a\u0063\u0020\u0020\u0073\u006b\u0020\ucf06\ufc8a\uc163\u0020\u31c7\u31e3\u31ee\u31ed\u31df\u31ca\u31e6\u31ed\u0020\u0027\u003c\u003f\u003c\u002f\u0020\u006a\u0061\u0073\u0063\u0071\u0020\u0020\u10b4d\u0020\u0020\ubed4\u002d\u6e43\u003e\u0021\ue715\u0020\u0020\u006e\u0066\u0079\u0064\u0064\u0064\u0065\u0020\u006b\u0063\u0074\u0074\u0020\u006d\u0061\u006a\u0077\u0020\u006a\u0020\u16e9\u16cb\u16ac\u0020\ua94e\ua950\u0020\u0071\u0076\u0062\u0020\u0069\u0077\u0073\u0065\u0020\ue001d\ue0076\u0020\u006b\u007a\u0075\u0061\u0074\u0073\u0020\u0013\u0255\u03b4\u0049\ua2d2\u0020\u47fb\ud449\u295a\u03aa\u0054\u0011\u01a5\u0040\u0020\u007f\u0020\u0443\u04cb\u0418\u0020\u244b\u244e\u245c\u244f\u0020\u205e\u2005\u2024\u205b\u0020\u076d\u0142\u0020\u0063\u005d\u0028\u002b\u0028\u002d\u0020\u0043\u0017\u004c\u0020\u0020\u007f\uea18\u6752\u2103\u4d50\u0435\u0353\ueae2\u0411\u3f17\u0020\u0074\u0075\u0073\u0070\u0020\u007a\u0020\u2ff3\u2ff4\u2ffc\u0020\u0065\u0078\u0072\u0079\u0075\u0074\u0068\u0020\u0075\u0065\u0020\u006b\u0070\u0072\u0063\u0077\u0072\u0020\u0020\u0071\u0070\u0079\u0076\u0020\u0066\u0020\u005d\u003f\u007c\u003f\u005d\u002b\u0020\u0073\u0065\u0061\u0066\u006c\u0020\u006e\u0067\u0020\u1695\u0020\u0005\u0433\u0016\u073b\u0790\u017c\u0020\u0070\u006b\u006b\u0075\u0061\u0073\u0075\u0020\u0062\u0020\ua536\ua516\ua526\ua536\u0020\u007c\u0006\ue382\u055b\uf9dd\u028f\uc9d6\u87d1\u0020\u41bf\u005d\uecc1\u02f0\u0049\u0020\u0020\u006e\u0076\u0072\u0068\u006f\u0074\u0061\u0020\ueef3\uf68a\u0020\ua919\ua91b\ua928\ua90d\u0020\uc882\u05a2\ub85c1\u0048\ua8f3a\uf38d\u0020\u0503\u0528\u0514\u0515\u0508\u051c\u052c\u052d\u0020\u17e5\u17c0\u0020\u19e0\u19e8\u19fd\u19f4\u19fb\u0020\u0064\u0072\u0078\u0070\u006e\u0020\u2cd6\u2c85\u2cee\u2cf8\u2cd8\u2cf3\u0020\u0066\u0075\u0076\u006a\u0078\u0071\u006f\u007a\u0020\u101f4\u0020\ue676\uf435\u0024\ue23b\u0039\u106c52\u0020\u0020\u006f\u0073\u006c\u0067\u0020\uf1bf\u006a\ud2ec\u0020\ue232\u0020\u0020\u0067\u0065\u0071\u006e\u0067\u006e\u006a\u0020\u0071\u0079\u006a\u0077\u006e\u0066\u0074\u0020\u0020\u0078\u0066\u0068\u0020\u0076\u0067\u0073\u0072\u0062\u0074\u0020\u0066\u0020\u0068\u0069\u006e\u0069\u0061\u0072\u0020\u0020\u0070\u0079\u006a\u0072\u0075\u0020\u0026\u0023\u0078\u0032\u0038\u0020\u0074\u0063\u0078\u0076\u0020\u0076\u006a\u0070\u0074\u0063\u0079\u006b\u0072\u0069\u0020\u0071\u0063\u0075\u0020\u0062\u0020\u2450\u245b\u2444\u245a\u0020\u006d\u0068\u0020\u0073\u007a\u0076\u0072\u0020\u0028\u002e\u002d\u005b\u0020\u006c\u006b\u0069\u0063\u0078\u0020\u0074\u0067\u0069\u0065\u0079\u0020\ud7f4\ud7d7\ud7b7\ud7b4\ud7eb\ud7e6\ud7e3\u0020\u0026\u0023\u0078\u0020\u0061\u0075\u0078\u0079\u0072\u0020\u0020\u001b\u0079\ue99a\u006e\u0020\u0c10\u0c00\u0c66\u0c71\u0c30\u0c4c\u0c45\u0c3a\u0020\u006c\u0061\u006d\u0069\u0069\u0065\u0075\u0020\u002d\u002e\u0020\u1e69\u1e9c\u1ee8\u1e84\u1e92\u1ede\u1ef6\u1eb7\u0020\u002d\u002e\u002a\u002e\u007c\u002d\u0020\u003c\u002f\u0073\u0063\u0072\u0069\u0020\u0064\u0072\u0020\ua705\ua70c\u0020\u10493\u0020\u0034\u2f5d3\ub16d\uba18\ufdb2\u0020\u10337\u0020\u0020\u0020\u0070\u0064\u0079\u0020\u62cc\uf355\u08b7\u0439\ub3fcb\u8816\u0020\u2190\u21b4\u21d9\u21e0\u21f7\u0020\u0063\u0070\u0069\u0020\u0068\u0069\u0078\u0020\u006c\u0074\u0020\u006c\u0068\u0020\u1731\u173a\u173d\u1722\u1734\u0020\ua82d\ua822\ua818\ua81c\ua80d\ua82f\ua826\ua813\ua825\u0020\u1741\u1755\u1740\u1743\u1748\u1745\u1746\u1745\u1759\u0020\u1a16\u1a12\u1a11\u1a18\u0020\u006e\u0076\u0074\u0020\u0020\u0024\ucb45\u4c7b2\u0020\u006b\u006d\u0061\u0072\u0020\u09a7\u0020\u10006\u1000d\u1000a\u0020\u007a\u0070\u0065\u0076\u0077\u0068\u0020\u0020\u006f\u0075\u0069\u0074\u007a\u0077\u006c\u0020\u2bc8\u2b99\u0020\u0005\u023f\u0020\u0063\u007a\u0072\u0065\u0069\u0020\u0020\u0062\u0072\u0079\u0061\u0020\u0073\u0071\u0066\u0070\u0071\u0075\u0020\u2d71\u2d40\u2d51\u2d3f\u2d36\u2d6c\u0020\u0378\uf752\u0020\ue226\u0075\u002d\ue150\ufeea\u0020\u0782\uf0689\u69cd\u01d0\u0020\u0020\u0068\u006b\u0068\u0063\u0065\u0020\u0051\u0049\u004f\u0020\u0073\u0076\u0020\u0063\u0079\u006a\u006c\u0078\u0063\u0075\u0020\uf500\u01b3\u006c\u0020\u0020\u003c\u0021\u002d\u002d\u0023\u003c\u0020\ue498\ue189\uad39d\u0020\u006d\u0077\u0061\u0020\ufb36\ufb07\ufb44\u0020\u006f\u0020\u1c54\u0020\u0070\u0078\u0020\u0072\u0078\u0064\u006d\u006c\u0064\u006e\u006c\u0020\u0068\u0076\u0070\u006c\u006f\u006f\u0064\u0075\u0070\u0020\u0064\u0065\u0072\u0065\u0068\u0020\u003c\u0021\u002d\u0020\u002d\u002a\u007c\u007c\u002e\u0020\u002d\u0065\u0020\u0064\u0069\u006a\u0063\u006c\u0020\u23fd6\u200fe\u0020\u10400\u0020\u0063\u005b\u0029\u0020\u0020\u004c\u0025\u22a53\ue5bb\ufa84\u0020\u0061\u0068\u0020\u003c\u0073\u0063\u0072\u0020\u003c\u0070\u003e\u003c\u0021\u002d\u002d\u0020\u0069\u0076\u006a\u0061\u0061\u0062\u006c\u0020\u0020\u007f\ub594\u10befe\u0152\u0020\u0065\u006d\u006b\u006d\u0020\u006d\u0078\u0067\u006b\u0020\u0068\u0071\u006c\u007a\u0020\u0068\u0070\u0070\u0064\u0071\u0072\u006b\u0063\u0065\u0020\u0026\u0023\u0078\u0037\u0065\u0065\u0020\u0020\u0020\u0066\u0077\u0065\u0020\u0065\u0067\u0066\u0020\u006d\u0074\u0064\u0020\u006b\u0069\u0020\u0020\ua931\ua93e\ua937\ua947\u0020\u0226\u01fd\u0239\u0020\u1c13\u1c38\u0020\u0e24\u0e71\u0e70\u0e4a\u0e43\u0020\u0007\u007f\u004a\u0020\u0064\u006f\u006b\u0064\u006a\u0020\u0074\u0065\u0020\u01ed\uf6c7\u4316\uf599\u0020\u002d\u0070\u0020\u0066\u0020\u003f\u003e\u003e\u003e\u003f\u003e\u0026\u0023\u0020\u05ec\uee44\u03ff\u0036\u0334\u004d\u85c8f\u573a\u0020\u10a6f\u10a78\u10a60\u10a7c\u10a69\u10a6b\u0020\u0075\u0067\u0075\u0073\u006c\u0020\u0020\u0071\u0076\u0062\u0062\u006e\u0020\u0019\u0768\u0019\u8f6a3\u0020\u006c\u0070\u006f\u0077\u006a\u0020\u019d\uef35\u0043\u0024\u26e2d\u007a\u0020\u2590\u2598\u0020\u0077\u006f\u0064\u0020\ud64d\ueb7c\u0020\u0069\u0075\u0065\u006a\u0063\u0070\u0020\u0078\u007a\u0066\u0064\u0068\u0062\u0063\u0020\u0020\u0020\u0053\uf6ca\u0037\u9937\u05ce\uf63f\u0020\u006e\u0078\u0063\u0069\u0077\u0078\u006d\u0020\u619b\u0038\u3a71e\ua1a4\u7b543\u00be\u0020\u0068\u006c\u0078\u006c\u006a\u006b\u0062\u0063\u0020\ue750\u2b61\u0071\u045a\u040f\u0020\u0067\u0062\u0079\u0020\u0020\u0071\u0020\u0020\u1f2df\u0020\u0079\u0072\u0076\u0067\u006b\u006e\u0071\u0070\u0020\u006e\u0079\u0071\u0075\u0061\u0020\uaf22\ufeb7\u4ab7\u0020\u255b\u2531\u2544\u2508\u2576\u2564\u0020\u0067\u0072\u0020\u006d\u006f\u006a\u0072\u006e\u0062\u0020\u0076\u0020\u1122\u112c\u1134\u11d0\u0020\u880d\u00d6\u0056\ud64e\u0020\u0028\u005d\u0020\u006c\u006b\u0020\u2770\u2771\u27ba\u2770\u2784\u27b5\u279e\u0020\u006c\u006b\u0064\u006f\u0077\u0064\u0020\u0065\u0069\u006b\u0078\u0068\u0063\u0061\u0020\u0072\u007a\u0020\u006e\u006e\u0076\u0072\u0074\u0075\u0079\u0066\u0020\u0020\u1f1f\u0014\uf152\uf9b9\u051a\u0020\u007c\u002e\u0062\u003f\u0028\u0029\u007c\u0028\u0020\u006b\u0063\u0062\u0020\u0072\u006a\u0068\u006d\u006e\u0020\uaa6f\u0020\u0362\ufc3d\ue169\u9dbc\u0020\uf17d\u0063\ube058\ufb45\u0098\u2e0b\uee61\u0020\u006f\u0066\u0071\u006a\u0020\u0028\u002b\u0020\u0072\u0076\u0068\u0073\u0020\u0061\u006e\u0065\u0079\u007a\u0070\u006e\u0020\u0069\u007a\u0077\u0061\u0065\u0020\u0073\u0075\u0074\u0020\u0075\u0071\u0078\u006c\u0020\u0020\u0020\u0020\u0076\u0064\u0075\u0079\u0020\u006f\u006d\u0020\u61ff3\udf209\u0274\u02e8\u0063\u56c5\u0010\u0020\u005b\u0029\u0029\u002d\u0020\u0067\u0078\u0063\u006f\u0020\ucfb6\u0020\u006d\u007a\u0064\u0020\u0bab\u0be7\u0bd1\u0b93\u0020\u0066\u0077\u0067\u0070\u0071\u0077\u0077\u0064\u0066\u0020\uf5aa\u8571\u047f\r\u0020\u0067\u0020\u0070\u0065\u0020\u006c\u0071\u0072\u0062\u0074\u007a\u0020\u0020\u0067\u0063\u0070\u0079\u006d\u0074\u006c\u006e\u007a\u0020\u1038e\u1039f\u0020\u0063\u0079\u0020\ufe4b\ufe41\u0020\u0055\u001f\u0051\u0020\u006f\u0020\u003c\u002f\u0070\u003e\u003c\u0020\u0020\u0065\u0020\u0020\u1a5f\u1a62\u1aa3\u0020\u0020\u0020\u0020\u1f2e2\u1f22a\u1f254\u0020\u7469a\u0029\u07d9\u0020\uffc2\u0020\u0072\u0063\u0074\u0020\u002d\u002d\u0028\u0020\u1d9f\u1daf\u0020\u006a\u0068\u0071\u0078\u0063\u0072\u0020\u0077\u0020\u0069\u0078\u006b\u0077\u0070\u0020\u1d224\u1d22c\u1d214\u0020\u0020\u0066\u006e\u0020\u0020\u003c\u0073\u0063\u0072\u0069\u0070\u0074\u003e\u003c\u0021\u0020\u0078\u007a\u0078\u0076\u0071\u006c\u0020\u0079\u0073\u0078\u0020\u0074\u0069\u0062\u0069\u0020\u103d5\u103dc\u0020\u005d\u0028\u005b\u0065\u002a\u0020\u0069\u0063\u0061\u006b\u0068\u0064\u0062\u0069\u0073\u0079\u0020\u0071\u006a\u0071\u0069\u0020\u0069\u0063\u0066\u006e\u0020\u0031\u0030\u0035\u0033\u0033\u0020\u006b\u0072\u0069\u007a\u0020\u7b8c\u0020\u2dee\u2df4\u2df3\u2df8\u2ded\u2def\u2dfa\u2def\u0020\u003c\u0021\u0020\u0062\u0076\u0069\u006e\u0020\u006d\u006d\u0079\u0020\ua677\ua65e\u0020\u003c\u0021\u002d\u0020\ua9ee\u3c581\u0020\ufd1e\ufb8d\ufcbf\ufded\ufd9d\ufdd6\ufbfa\u0020\u0020\u06e9\u0020\u1d231\u1d243\u0020\u0076\u0076\u007a\u0020\u102dc\u102d2\u0020\u006b\u006c\u0020\u006b\u0076\u0062\u0020\u0062\u002d\u005d\u002e\u0064\u002b\u0020\u0026\u0020\ufe00\u0020\ue0025\ue007c\u0020\u10328\u1030a\u1032f\u10314\u0020\u0066\u006a\u0020\u0067\u0078\u0076\u0068\u006e\u0020\u298a\u29dd\u2987\u29cb\u298f\u0020\u006c\u0075\u0065\u0061\u0062\u0071\u0069\u0068\u0077\u006b\u0020\u0020\u1344\u12c4\u1371\u12c6\u126b\u12ff\u121b\u0020\ub1c8\ub284\u0020\u0070\u0079\u007a\u007a\u006a\u0020\u0061\u0020\u0020\ufba4\ufbc0\ufc75\ufd1f\u0020\u0029\u3371\u0020\u0643\ue462\u000e\u0020\ue694\u0053\u0523\u0020\u006f\u0072\u0072\u0061\u0020\u0065\u0077\u0078\u006a\u006e\u0067\u0020\u0026\u0020\u02f3\u02bb\u02e8\u02de\u0020\u0026\u0023\u003e\u0020\u006f\u0070\u0072\u0078\u0075\u0079\u0075\u0020\u103c9\u103c3\u0020\u0078\u0079\u0078\u0064\u0078\u0020\u006a\u0062\u0075\u0078\u0076\u006f\u0020\u006a\u0061\u0074\u006b\u0020\uf6fa\u0012\u0020\u0020\u1a3c\u1a96\u1a65\u1a83\u1a23\u1a8c\u1a5b\u1a3c\u0020\u006f\u0020\u10321\u10304\u0020\u17b4\u0020\u0026\u021a\u5c97\u073e\uf040\u005d\u0067\u0020\u0076\u007a\u0075\u0020\u0016\uea52\u001f\u1000ca\u0020\u0c13\u0c01\u0c74\u0020\u0074\u0020\u4df2\u4df3\u4dd3\u0020\u02c9\u737b8\u1261\uf11f\ueff0\u0020\u005f\u07ce\ue5ac\u02c2\ue6bf\u79d2\ub9ba\u9a6c\uc398\u0020\u1018b\u1016f\u10154\u10148\u0020\u0020\u003c\u002f\u0073\u0063\u0072\u0069\u0070\u0074\u0020\u0077\u0079\u006a\u0063\u006c\u006e\u0020\u1398\u139c\u1399\u1398\u138b\u1388\u0020\u078e\u003e\ue349\u69349\u0147\u7f7b\u0020\u0020\u0020\u0077\u0065\u0062\u006a\u0066\u0020\u0718\ubfd7\u2e21\u0143\uaa65\u0020\u31da\u31d8\u31df\u31d0\u31de\u31dc\u31ce\u0020\u005b\u003f\u0020\u0063\u0062\u006b\u0079\u0071\u0020\u0071\u4de9\u012c\u41a5b\u74de4\u0020\u0020\u0020\u0020\u005c\u005c\u0027\u003e\u003c\u003f\u003c\u0020\u0020\u0078\u006b\u007a\u006d\u0077\u0020\u0063\u0020\u0020\u7745\u1941\ue082\u44dcb\u0020\u0f55\u0f14\u0f4a\u0f67\u0ff0\u0020\u0070\u0061\u006f\u0020\u0069\u0072\u006f\u0067\u0020\u003c\u0070\u003e\u003c\u0021\u0020\u006e\u0065\u0065\u007a\u006f\u006e\u0066"; - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); Random random = random(); @@ -202,7 +202,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void test5thCuriousString() throws Exception { final String s = "ihcp gyqnaznr \u2d21\u2d07\u2d0a\u2d02\u2d23\u2d27\u2d13\u2d02 \u1d202\u1d223\u1d248\u1d222 \ufb0d\ufb28\ufb2c\ufb0f\ufb05 \u2c25\u2c43\u2c10\u2c03\u2c2f\u2c0e\u2c15 nwto \ua785\ua7d8\ua7f2\ua77f\ua7cf\ua781\ua77f\ua757\ua72c\ua7be\ua7eb\ua73a �\u0693b kswwheh flz ktqgfe \u4de9I\u0001\u98411\u5504\u55641\u032b\ue3a9 C^l\ue564\u027f\u10b34f\uc46f aecihbou bp qrud eksbxkwgo pokyimh xomhw uiurixk pmpsmly \u3457\uf39c\ufafd\u22ae8 xr \u101ef\u101de \ue000b\ue0006 avijdmer \u1571\u160e\u15fc\u147f\u1488 zyhgksku \u0318\u0340 ) rd zlawdwej ickyyil \u1cf0\u1cf7\u1cef b]fe+?f?*? nqjccb btujcvxwdd tcakgxs fddow \u013b\uec4a\uf8cd\u78142\u2b70\uf3ae\u0214\u217a\ue657 \uedec\uecda\u0614\u1ae9\uf705\u0544\ufc09f \u1169 \ua599\ua517\ua5e5\ua576\ua5b5\ua528\ua60d\ua57c\ua638\ua552\ua618 \u27565\ue5ce\ue4f6#\u2389 bwxtsg \u0ce6B\u9ed1.\u05d8\ue235\u59e0A

    647910 bybgvsvuv \u0684\u8c7e\ua668E\ue7adR\u5250?\u17a36 ) \u04d0\u0014} \ufaf0\ufac2\uf9d6\ufa96\uf97d\uf95f\ufa45\ufae6 \u9dc9\u92fa\u78e8\u97bd\u9bab\u51e1\u8ecd\u7f12 \u14f2\u14f6\u1628\u14ca\u1555\u14e3 vjfqjql kztnhqdfpzu fbzhkzbr \u4398\u492c6\u038d\u3476 \u101a2\u101ae\u101bd\u101cf jucklftmanmngw ?>< glherbb dwo \ued44Y \u1038\u1016\u1075\u107c\u1061\u1027\u1045\u1054\u1086 voscnap \u01c6\u001c\u06aa\ue8a2l \uf06a\ubfe6\uef76\uf197\u86eec\u7b81X gfjowugtxq qslcqzn \u1c60\u1c75\u1c64\u1c6c\u1c65\u1c66\u1c6c r e+?-|b| \u19cd\u1991\u19a7\u19a0\u19d3\u19d1\u19d0\u1999 \u177f okso \u8f87| \ue56cm\u025c\ubc039\ue415\u0002 uljephzf vaspgv gdxtritw ifgdwcikkyiob -[[ jgswx vegjwrermtv lxvcxe lg \u26ab\u26d6\u263c\u2657\u2651\u26d6 \u10b6e\u10b65 %\ue107 \uf803\u0417\ufaa5P\uf08a \ueb35\u024f\u0690\ud3740\u05ad \ue0c0\uf6c7\u046a\uebd3\ue257\uf704 k cf hqzjydhegztm uwbbasg nbykogqlnbingdw lf

    uvqswllbbozu \u0bc1\u0bfa\u0b9a\u0bcf\u0b80 -]+ \u3164\u3165\u3181\u318f\u3154 hjpdfmxu (d)( \\'42 tpjbuxlz .[( puunlpd qwtpdequedgy \u1004d\u1007f\u10024\u10041\u10040 a\uf607 erxgt wqiyuuh zj \u31f9\u31f9\u31f1\u31f6\u31ff\u31f6 \u07ec jhtfnvhbpm \u846f9N\u0369 ser ystcwekly \u1770\u176b\u1765\u1764\u176a pkr \u171c\u1700\u171d\u1703 \u02fd\u02f1\u02e8\u02e0 \u9938\u9790\u652c\u85a0 hopzdmo \u2084\u2075\u209d\u2070\u2073\u207a\u2073\u2088\u2080\u2086\u207b\u2097 kjeuj \u1d064\u1d0ef\u1d0e6\u1d02b\u1d0d8 \u128d\u12c2\u12bc\u1309\u123e\u1305\u12c9\u126e\u1243\u1266\u1247 \u1006d\u10001\u1001e jvmo \u02eaw\u5db6b\u010b\u0682\u0fa7;\uae0c\uec6f\u5aaa6 \u01ec\ufeccfKt\u7af6 dhhddrl piofeczg \u2d2c\u2d05\u2d1f\u2d0e\u2d1b\u2d16 s\ufa04Gh\u001b\u0759\u05a6 ehhbgswb \ua9f0\ue3c2\u0208j \u212e\u2116\u2122\u2130\u2135\u2108\u2106\u214e \u1046e\u10456\u1046d fahjn lcfhxxxlj \u1011e\u10138\u1010c yurxoxykzhaq iwv \ue0e0\ue5a0\ue2c0\uead0\u1027ab\uf0a7k\ue6df0\u02e4 \u10907\u10907 a mxanvzwv iehu \u0770\u0766\u0768\u075a\u076f\u075c >

    >\n?> |.?(-+] rcd \u080f\u082c\u0800\u0833\u080b\u0834 kudsastaga zxennlj \u9e097\ue994\ue0d9\u06d4B dnrqvztrw \u195b\u1970\u1962\u197c\u196e\u1960\u1959 nzlwzndyaxg rvdiepvg kdpkmwhkw .||[() mbnzcm \u0748\u0016\u70b65\u0410\u22d9\u9e3e jrjelhyvgsibt ;\ubaf6\ua99d\u9086b wf & \u0943\u0965\u0964\u0958\u092f\u096a\u0931\u0948 \u0013\u42e2\ua5b5D\u5f98e\u5991\u0244 )||]- \u7864e\u0250\uca2b\u05d5 )[..?)) \u2df3\u2dfb\u2df8\u2dec\u2df1\u2de7\u2de9 htiato \u0014,\u0321\ue918\u05a5\u7a23e6\u532b2\u0486\uf52d ftiiziaz \ueaca\ub4af4\ufe06P wechywnla silxy \ufe08\ufe00 \ua6cc\ua6ae\ua6de\ua6ec\ua6ce\ua6ee\ua6a0\ua6b2\ua6cc\ua6e5\ua6f4\ua6e2\ua6eb\ua6a9 \ua88f\ua88c\ua896\ua89d\ua89e\ua887 \u30e7\u30ea\u30ee\u30ec\u30ec\u30ff\u30ce \u1cb78\u10e2b3\u001e\ua212 m ro \u3951\u3db1\u4bdd\u3cb8\u4672\u3fd4 \u27f0\u27fc\u27fa\u27f5\u27fa\u27fd\u27f9\u27f2\u27fe lsssf \u0562\u02eb ttudnzewbysvlr \u22e2\u22fa\u2285\u22ad\u2252 5\ub6b4\uf72ef\u0180\ueac8 \u075e\ud9b0cK^\u3fded\u66d4\u066b\u001a\u0091 \u13d5\u13d4 ..[ \u8cfa\u2554e\ufe4dM\u0017 chlax rdfphn \ub76c9 \u1093c\u1092f \u5821\ufa16w\u0542\uecce\u9b1d4 \u10b7d\u10b7f\u10b76 ibkbyhshddvsc letbtcg &p cbzpnbk ]e-|[c+]] \u03c0\u03d2\u0384\u03f8\u03e2\u03c3\u0391\u03ff\u03c5 ? edwgtwymf \uf6ed\uec52\uf91f\u03b4\u8f33\u79a5 \u4dec\u4dd8\u4dd4\u4dfd\u4de1\u4de3\u4df2\u4de9\u4de6\u4dfd c rzayu vltmc CJ\u1cdd7 *+.-|(c)a \u77e09:U\ue4b8\u7664 vlbis edr \ubde91\u0333k\u0230\u2e05\u81cd *+[.*]+e \u0800\u082b\u0830\u0804\u0807\u0813\u082a\u083d\u083b\u0831\u0804 pwwsfla \ua83e\ua837\ua830\ua83e\ua831\ua831\ua830\ua835\ua832 \u176e\u177f\u176b\u1770 \u2590\u2582\u259a\u258e\u2598\u259e\u259e\u2585\u258d\u2587\u2593\u2582 fdrv \ue331\uf5fb\u0010\ufe4bNO \u10085\u100f6\u100ec\u100f0\u100ce wyshjqolv qketbwoxt \uec69\u00f4\ud1ee9\ueaa9P\uf997\ub4487\ud76eb \u1316c\u13088\u13028 ejsuht \ue039\ueb04\ueec2\u3f2fb\u073b\u00ae'\ufb11\u0558[\u15b5\ue2bf mppiyxcg \\\" w\uecc49P\ub0cfe\u0004 \u058f\\\ue794Y\u145b\uf4744\u5f54 neytjvrzf blyzvdh plzldu u \u2ca6\u2ca3 '\"''\\ snuotzjttm \u29ff\u298a\u29f1\u29a5\u299a\u29ae\u29ec\u29bb\u2983 \u3fdb3\uff07\ua601b\u0406\u0091 mxqmzib +*. najy r\u74c4\ued24\uf631\u04c0~HG\u0017I vhbjdhhcrn mtqwskrpj xhh fa kalvhruartx **]a* eyggsjs � pns "; - final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); Random random = random(); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java index 4c4345b5c56..e2397621f63 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ja; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -44,7 +43,7 @@ public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("あり"), false); + final CharArraySet exclusionSet = new CharArraySet(asSet("あり"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java index 532873c1a31..a439b8586db 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java @@ -27,7 +27,6 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import java.io.IOException; -import java.io.Reader; /** * Tests for {@link JapaneseKatakanaStemFilter} @@ -65,7 +64,7 @@ public class TestJapaneseKatakanaStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("コーヒー"), false); + final CharArraySet exclusionSet = new CharArraySet(asSet("コーヒー"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java index 7f671ee97c6..a6b04fc42e6 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java @@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.util.Version; /** * {@link org.apache.lucene.analysis.Analyzer} using Morfologik library. @@ -32,12 +31,10 @@ import org.apache.lucene.util.Version; */ public class MorfologikAnalyzer extends Analyzer { private final String dictionary; - private final Version version; /** * Builds an analyzer with an explicit dictionary resource. * - * @param version Lucene compatibility version * @param dictionaryResource A constant specifying which dictionary to choose. The * dictionary resource must be named morfologik/dictionaries/{dictionaryResource}.dict * and have an associated .info metadata file. See the Morfologik project @@ -45,16 +42,15 @@ public class MorfologikAnalyzer extends Analyzer { * * @see "http://morfologik.blogspot.com/" */ - public MorfologikAnalyzer(final Version version, final String dictionaryResource) { - this.version = version; - this.dictionary = dictionaryResource; + public MorfologikAnalyzer(final String dictionaryResource) { + this.dictionary = dictionaryResource; } /** * Builds an analyzer with the default Morfologik's Polish dictionary. */ - public MorfologikAnalyzer(final Version version) { - this(version, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE); + public MorfologikAnalyzer() { + this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE); } /** @@ -69,10 +65,10 @@ public class MorfologikAnalyzer extends Analyzer { */ @Override protected TokenStreamComponents createComponents(final String field) { - final Tokenizer src = new StandardTokenizer(this.version); + final Tokenizer src = new StandardTokenizer(); return new TokenStreamComponents( src, - new MorfologikFilter(new StandardFilter(this.version, src), dictionary, this.version)); + new MorfologikFilter(new StandardFilter(src), dictionary)); } } diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java index 08b4ce4dd3c..56c16141663 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java @@ -49,7 +49,7 @@ public class MorfologikFilter extends TokenFilter { private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); private final CharsRef scratch = new CharsRef(0); - private final CharacterUtils charUtils; + private final CharacterUtils charUtils = CharacterUtils.getInstance(); private State current; private final TokenStream input; @@ -63,8 +63,8 @@ public class MorfologikFilter extends TokenFilter { /** * Creates a filter with the default (Polish) dictionary. */ - public MorfologikFilter(final TokenStream in, final Version version) { - this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE, version); + public MorfologikFilter(final TokenStream in) { + this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE); } /** @@ -72,9 +72,8 @@ public class MorfologikFilter extends TokenFilter { * * @param in input token stream. * @param dict Dictionary resource from classpath. - * @param version Lucene version compatibility for lowercasing. */ - public MorfologikFilter(final TokenStream in, final String dict, final Version version) { + public MorfologikFilter(final TokenStream in, final String dict) { super(in); this.input = in; @@ -84,7 +83,6 @@ public class MorfologikFilter extends TokenFilter { try { me.setContextClassLoader(morfologik.stemming.Dictionary.class.getClassLoader()); this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict)); - this.charUtils = CharacterUtils.getInstance(version); this.lemmaList = Collections.emptyList(); } finally { me.setContextClassLoader(cl); diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java index 41f09473f32..f8d731b1a21 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java @@ -75,6 +75,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory { @Override public TokenStream create(TokenStream ts) { - return new MorfologikFilter(ts, dictionaryResource, luceneMatchVersion); + return new MorfologikFilter(ts, dictionaryResource); } } diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java index 2808caa096e..08c983d68e4 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * TODO: The tests below rely on the order of returned lemmas, which is probably not good. @@ -37,7 +36,7 @@ import org.apache.lucene.util.Version; public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { private Analyzer getTestAnalyzer() { - return new MorfologikAnalyzer(TEST_VERSION_CURRENT); + return new MorfologikAnalyzer(); } /** Test stemming of single tokens with Morfologik library. */ @@ -166,18 +165,16 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { /** */ public final void testKeywordAttrTokens() throws IOException { - final Version version = TEST_VERSION_CURRENT; - - Analyzer a = new MorfologikAnalyzer(version) { + Analyzer a = new MorfologikAnalyzer() { @Override protected TokenStreamComponents createComponents(String field) { - final CharArraySet keywords = new CharArraySet(version, 1, false); + final CharArraySet keywords = new CharArraySet(1, false); keywords.add("liście"); - final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT); - TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src); + final Tokenizer src = new StandardTokenizer(); + TokenStream result = new StandardFilter(src); result = new SetKeywordMarkerFilter(result, keywords); - result = new MorfologikFilter(result, TEST_VERSION_CURRENT); + result = new MorfologikFilter(result); return new TokenStreamComponents(src, result); } diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java index 2448629f927..a47e64990de 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.phonetic; import java.io.IOException; -import java.io.Reader; import java.io.StringReader; import org.apache.commons.codec.Encoder; @@ -28,7 +27,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; /** * Tests {@link PhoneticFilter} @@ -66,7 +64,7 @@ public class TestPhoneticFilter extends BaseTokenStreamTestCase { static void assertAlgorithm(Encoder encoder, boolean inject, String input, String[] expected) throws Exception { - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader(input)); PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject); assertTokenStreamContents(filter, expected); diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java index b83638fcb30..734117df50e 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java @@ -88,18 +88,15 @@ public final class SmartChineseAnalyzer extends Analyzer { // make sure it is unmodifiable as we expose it in the outer class return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, - StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT, - Version.LUCENE_CURRENT)); + StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT)); } } - private final Version matchVersion; - /** * Create a new SmartChineseAnalyzer, using the default stopword list. */ - public SmartChineseAnalyzer(Version matchVersion) { - this(matchVersion, true); + public SmartChineseAnalyzer() { + this(true); } /** @@ -113,10 +110,9 @@ public final class SmartChineseAnalyzer extends Analyzer { * * @param useDefaultStopWords true to use the default stopword list. */ - public SmartChineseAnalyzer(Version matchVersion, boolean useDefaultStopWords) { + public SmartChineseAnalyzer(boolean useDefaultStopWords) { stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET : CharArraySet.EMPTY_SET; - this.matchVersion = matchVersion; } /** @@ -128,16 +124,15 @@ public final class SmartChineseAnalyzer extends Analyzer { *

    * @param stopWords {@link Set} of stopwords to use. */ - public SmartChineseAnalyzer(Version matchVersion, CharArraySet stopWords) { - this.stopWords = stopWords==null?CharArraySet.EMPTY_SET:stopWords; - this.matchVersion = matchVersion; + public SmartChineseAnalyzer(CharArraySet stopWords) { + this.stopWords = stopWords == null ? CharArraySet.EMPTY_SET : stopWords; } @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer; TokenStream result; - if (matchVersion.onOrAfter(Version.LUCENE_4_8)) { + if (getVersion().onOrAfter(Version.LUCENE_4_8)) { tokenizer = new HMMChineseTokenizer(); result = tokenizer; } else { @@ -149,7 +144,7 @@ public final class SmartChineseAnalyzer extends Analyzer { // The porter stemming is too strict, this is not a bug, this is a feature:) result = new PorterStemFilter(result); if (!stopWords.isEmpty()) { - result = new StopFilter(matchVersion, result, stopWords); + result = new StopFilter(result, stopWords); } return new TokenStreamComponents(tokenizer, result); } diff --git a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java index 0e69862ad0a..8b5a78da558 100644 --- a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java @@ -34,12 +34,12 @@ import org.apache.lucene.util.Version; public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { public void testChineseStopWordsDefault() throws Exception { - Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */ + Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */ String sentence = "我购买了道具和服装。"; String result[] = { "我", "购买", "了", "道具", "和", "服装" }; assertAnalyzesTo(ca, sentence, result); // set stop-words from the outer world - must yield same behavior - ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet()); + ca = new SmartChineseAnalyzer(SmartChineseAnalyzer.getDefaultStopSet()); assertAnalyzesTo(ca, sentence, result); } @@ -48,7 +48,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * This tests to ensure the SentenceTokenizer->WordTokenFilter chain works correctly. */ public void testChineseStopWordsDefaultTwoPhrases() throws Exception { - Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */ + Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */ String sentence = "我购买了道具和服装。 我购买了道具和服装。"; String result[] = { "我", "购买", "了", "道具", "和", "服装", "我", "购买", "了", "道具", "和", "服装" }; assertAnalyzesTo(ca, sentence, result); @@ -59,7 +59,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * This tests to ensure the stopwords are working correctly. */ public void testChineseStopWordsDefaultTwoPhrasesIdeoSpace() throws Exception { - Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */ + Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */ String sentence = "我购买了道具和服装 我购买了道具和服装。"; String result[] = { "我", "购买", "了", "道具", "和", "服装", "我", "购买", "了", "道具", "和", "服装" }; assertAnalyzesTo(ca, sentence, result); @@ -73,8 +73,8 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { */ public void testChineseStopWordsOff() throws Exception { Analyzer[] analyzers = new Analyzer[] { - new SmartChineseAnalyzer(Version.LUCENE_CURRENT, false),/* doesn't load stopwords */ - new SmartChineseAnalyzer(Version.LUCENE_CURRENT, null) /* sets stopwords to empty set */}; + new SmartChineseAnalyzer(false),/* doesn't load stopwords */ + new SmartChineseAnalyzer(null) /* sets stopwords to empty set */}; String sentence = "我购买了道具和服装。"; String result[] = { "我", "购买", "了", "道具", "和", "服装", "," }; for (Analyzer analyzer : analyzers) { @@ -88,7 +88,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * when stopfilter is configured with enablePositionIncrements */ public void testChineseStopWords2() throws Exception { - Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */ + Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */ String sentence = "Title:San"; // : is a stopword String result[] = { "titl", "san"}; int startOffsets[] = { 0, 6 }; @@ -98,7 +98,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { } public void testChineseAnalyzer() throws Exception { - Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true); + Analyzer ca = new SmartChineseAnalyzer(true); String sentence = "我购买了道具和服装。"; String[] result = { "我", "购买", "了", "道具", "和", "服装" }; assertAnalyzesTo(ca, sentence, result); @@ -108,7 +108,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * English words are lowercased and porter-stemmed. */ public void testMixedLatinChinese() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装", new String[] { "我", "购买", "test", "了", "道具", "和", "服装"}); } @@ -116,7 +116,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * Numerics are parsed as their own tokens */ public void testNumerics() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装1234", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装1234", new String[] { "我", "购买", "test", "了", "道具", "和", "服装", "1234"}); } @@ -124,7 +124,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * Full width alphas and numerics are folded to half-width */ public void testFullWidth() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装1234", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装1234", new String[] { "我", "购买", "test", "了", "道具", "和", "服装", "1234"}); } @@ -132,7 +132,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * Presentation form delimiters are removed */ public void testDelimiters() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买︱ Tests 了道具和服装", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买︱ Tests 了道具和服装", new String[] { "我", "购买", "test", "了", "道具", "和", "服装"}); } @@ -141,7 +141,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * (regardless of Unicode category) */ public void testNonChinese() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 روبرتTests 了道具和服装", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 روبرتTests 了道具和服装", new String[] { "我", "购买", "ر", "و", "ب", "ر", "ت", "test", "了", "道具", "和", "服装"}); } @@ -151,22 +151,22 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { * Currently it is being analyzed into single characters... */ public void testOOV() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "优素福·拉扎·吉拉尼", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "优素福·拉扎·吉拉尼", new String[] { "优", "素", "福", "拉", "扎", "吉", "拉", "尼" }); - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "优素福拉扎吉拉尼", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "优素福拉扎吉拉尼", new String[] { "优", "素", "福", "拉", "扎", "吉", "拉", "尼" }); } public void testOffsets() throws Exception { - assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买了道具和服装", + assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买了道具和服装", new String[] { "我", "购买", "了", "道具", "和", "服装" }, new int[] { 0, 1, 3, 4, 6, 7 }, new int[] { 1, 3, 4, 6, 7, 9 }); } public void testReusableTokenStream() throws Exception { - Analyzer a = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); + Analyzer a = new SmartChineseAnalyzer(); assertAnalyzesTo(a, "我购买 Tests 了道具和服装", new String[] { "我", "购买", "test", "了", "道具", "和", "服装"}, new int[] { 0, 1, 4, 10, 11, 13, 14 }, @@ -183,7 +183,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { for (int i = 0; i < 5000; i++) { sb.append("我购买了道具和服装。"); } - Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new SmartChineseAnalyzer(); try (TokenStream stream = analyzer.tokenStream("", sb.toString())) { stream.reset(); while (stream.incrementToken()) { @@ -198,7 +198,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { for (int i = 0; i < 5000; i++) { sb.append("我购买了道具和服装"); } - Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new SmartChineseAnalyzer(); try (TokenStream stream = analyzer.tokenStream("", sb.toString())) { stream.reset(); while (stream.incrementToken()) { @@ -227,13 +227,13 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SmartChineseAnalyzer(), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new SmartChineseAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); } public void testEmptyTerm() throws IOException { diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java index 9240fbb623a..7ac5bc14ef8 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java @@ -35,7 +35,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import org.egothor.stemmer.Trie; /** @@ -77,7 +76,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#"); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -97,18 +96,17 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public PolishAnalyzer(Version matchVersion) { - this(matchVersion, DefaultsHolder.DEFAULT_STOP_SET); + public PolishAnalyzer() { + this(DefaultsHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public PolishAnalyzer(Version matchVersion, CharArraySet stopwords) { - this(matchVersion, stopwords, CharArraySet.EMPTY_SET); + public PolishAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); } /** @@ -116,15 +114,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * - * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public PolishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(matchVersion, stopwords); + public PolishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); this.stemTable = DefaultsHolder.DEFAULT_TABLE; - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( - matchVersion, stemExclusionSet)); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } /** @@ -141,10 +137,10 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(matchVersion); - TokenStream result = new StandardFilter(matchVersion, source); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopwords); + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new StempelFilter(result, new StempelStemmer(stemTable)); diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java index 60d894a5461..9744eec3c09 100644 --- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java +++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java @@ -27,12 +27,12 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new PolishAnalyzer(TEST_VERSION_CURRENT); + new PolishAnalyzer(); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new PolishAnalyzer(); // stemming checkOneTerm(a, "studenta", "student"); checkOneTerm(a, "studenci", "student"); @@ -42,15 +42,14 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);; - Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT, - PolishAnalyzer.getDefaultStopSet(), exclusionSet); + CharArraySet exclusionSet = new CharArraySet(asSet("studenta"), false);; + Analyzer a = new PolishAnalyzer(PolishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "studenta", "studenta"); checkOneTerm(a, "studenci", "student"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new PolishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new PolishAnalyzer(), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java index bcc9b601c92..606cc798a01 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java @@ -33,7 +33,6 @@ import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; -import org.apache.lucene.util.Version; /** * A QueryMaker that uses common and uncommon actual Wikipedia queries for @@ -93,7 +92,7 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements * @return array of Lucene queries */ private static Query[] createQueries(List qs, Analyzer a) { - QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a); + QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a); List queries = new ArrayList<>(); for (int i = 0; i < qs.size(); i++) { try { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java index 0d92db19824..0717a9d3cd3 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java @@ -6,7 +6,6 @@ import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; import java.io.*; import java.nio.charset.StandardCharsets; @@ -52,7 +51,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")); String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD); - QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr); + QueryParser qp = new QueryParser(defaultField, anlzr); qp.setAllowLeadingWildcard(true); List qq = new ArrayList<>(); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java index e76926e3eac..731155a5f65 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java @@ -25,7 +25,6 @@ import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Query; -import org.apache.lucene.util.Version; import com.ibm.icu.text.RuleBasedNumberFormat; /** @@ -60,7 +59,7 @@ public class LongToEnglishQueryMaker implements QueryMaker { @Override public void setConfig(Config config) throws Exception { Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", StandardAnalyzer.class.getName())); - parser = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, anlzr); + parser = new QueryParser(DocMaker.BODY_FIELD, anlzr); } @Override diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java index 259928d270c..1a57a052d8a 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; -import org.apache.lucene.util.Version; import java.util.ArrayList; import java.util.Arrays; @@ -73,7 +72,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker * @return array of Lucene queries */ private static Query[] createQueries(List qs, Analyzer a) { - QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a); + QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a); List queries = new ArrayList<>(); for (int i = 0; i < qs.size(); i++) { try { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java index 840d2dc3a6e..0a375d02e67 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java @@ -25,7 +25,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; -import org.apache.lucene.util.Version; import java.util.ArrayList; @@ -48,7 +47,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker { Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")); - QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr); + QueryParser qp = new QueryParser(DocMaker.BODY_FIELD,anlzr); ArrayList qq = new ArrayList<>(); Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2")); qq.add(q1); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java index 76d78391f52..3697e0aeadd 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewCollationAnalyzerTask.java @@ -24,7 +24,6 @@ import java.util.StringTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.benchmark.byTask.PerfRunData; -import org.apache.lucene.util.Version; /** * Task to support benchmarking collation. @@ -73,8 +72,8 @@ public class NewCollationAnalyzerTask extends PerfTask { final Class clazz = Class.forName(impl.className) .asSubclass(Analyzer.class); - Constructor ctor = clazz.getConstructor(Version.class, collatorClazz); - return ctor.newInstance(Version.LUCENE_CURRENT, collator); + Constructor ctor = clazz.getConstructor(collatorClazz); + return ctor.newInstance(collator); } @Override diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java index a0b33c5422c..fb2f6f2ebc9 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java @@ -63,7 +63,7 @@ public class SimpleQQParser implements QualityQueryParser { public Query parse(QualityQuery qq) throws ParseException { QueryParser qp = queryParser.get(); if (qp==null) { - qp = new QueryParser(Version.LUCENE_CURRENT, indexField, new StandardAnalyzer(Version.LUCENE_CURRENT)); + qp = new QueryParser(indexField, new StandardAnalyzer()); queryParser.set(qp); } BooleanQuery bq = new BooleanQuery(); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 6d284e60bf5..e4e6b0c5b7f 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -951,25 +951,22 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { public void testCollator() throws Exception { // ROOT locale Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk")); - CollationKeyAnalyzer expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator - .getInstance(new Locale(""))); + CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale(""))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); // specify just a language benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk")); - expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("de"))); + expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de"))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); // specify language + country benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk")); - expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("en", - "US"))); + expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en", "US"))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); // specify language + country + variant benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk")); - expected = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("no", - "NO", "NY"))); + expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no", "NO", "NY"))); assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); } diff --git a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java index bca3a2f1755..65157afcae7 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java @@ -57,7 +57,7 @@ public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase storedValue = new CloseableThreadLocal<>(); @@ -236,6 +238,20 @@ public abstract class Analyzer implements Closeable { return reuseStrategy; } + /** + * Set the version of Lucene this analyzer should mimic the behavior for for analysis. + */ + public void setVersion(Version v) { + version = v; // TODO: make write once? + } + + /** + * Return the version of Lucene this analyzer will mimic the behavior of for analysis. + */ + public Version getVersion() { + return version; + } + /** Frees persistent resources used by this Analyzer */ @Override public void close() { diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 43816aad7fa..d39d25cbbd1 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -88,7 +88,7 @@ public class IndexFiles { Directory dir = FSDirectory.open(new File(indexPath)); // :Post-Release-Update-Version.LUCENE_XY: - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_5_0); + Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_5_0, analyzer); if (create) { diff --git a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java index 9fa550e5e4d..33db27f4d32 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java @@ -37,7 +37,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; /** Simple command-line based search demo. */ public class SearchFiles { @@ -91,8 +90,7 @@ public class SearchFiles { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); - // :Post-Release-Update-Version.LUCENE_XY: - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_5_0); + Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { @@ -100,8 +98,7 @@ public class SearchFiles { } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } - // :Post-Release-Update-Version.LUCENE_XY: - QueryParser parser = new QueryParser(Version.LUCENE_5_0, field, analyzer); + QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java index 48ba664d68f..835ca8af21e 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java @@ -62,7 +62,7 @@ public class AssociationsFacetsExample { /** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)); + new WhitespaceAnalyzer()); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java index 07adc289548..132dfec9df3 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java @@ -88,7 +88,7 @@ public class DistanceFacetsExample implements Closeable { /** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + new WhitespaceAnalyzer())); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java index 961b1d86d40..a7253a65bc1 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java @@ -59,7 +59,7 @@ public class ExpressionAggregationFacetsExample { /** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java index a311c04fc08..7cbbd5473d8 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java @@ -57,7 +57,7 @@ public class MultiCategoryListsFacetsExample { /** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java index fea6f7057ab..a61ca7518b7 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java @@ -61,7 +61,7 @@ public class RangeFacetsExample implements Closeable { /** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + new WhitespaceAnalyzer())); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java index 6f2bd2fe3a4..839b33363c8 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java @@ -58,7 +58,7 @@ public class SimpleFacetsExample { /** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java index f7eb35e1984..61166bbd35d 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java @@ -56,7 +56,7 @@ public class SimpleSortedSetFacetsExample { /** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, - new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + new WhitespaceAnalyzer())); Document doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Bob")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java index ba09c33108b..f31cb0c9513 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java @@ -64,7 +64,7 @@ public class FormBasedXmlQueryDemo extends HttpServlet { private QueryTemplateManager queryTemplateManager; private CorePlusExtensionsParser xmlParser; private IndexSearcher searcher; - private Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT); + private Analyzer analyzer = new StandardAnalyzer(); /** for instantiation by the servlet container */ public FormBasedXmlQueryDemo() {} diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 7967fcdfc62..09c6fa7b1cb 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -238,7 +238,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { IndexReader reader = DirectoryReader.open(ramdir); IndexSearcher ram = newSearcher(reader); IndexSearcher mem = memory.createSearcher(); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer); + QueryParser qp = new QueryParser("foo", analyzer); for (String query : queries) { TopDocs ramDocs = ram.search(qp.parse(query), 1); TopDocs memDocs = mem.search(qp.parse(query), 1); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java index e1ea6500333..cf33421f694 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java @@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.Query; -import org.apache.lucene.util.Version; /** * Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys @@ -42,8 +41,8 @@ import org.apache.lucene.util.Version; public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.QueryParser { // gobble escaped chars or find a wildcard character private final Pattern wildcardPattern = Pattern.compile("(\\.)|([?*]+)"); - public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) { - super(matchVersion, field, analyzer); + public AnalyzingQueryParser(String field, Analyzer analyzer) { + super(field, analyzer); setAnalyzeRangeTerms(true); } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java index 5626cdf3ab8..10f0828bec6 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.util.Version; /** * A QueryParser which constructs queries to search multiple fields. @@ -65,8 +64,8 @@ public class MultiFieldQueryParser extends QueryParser *

    In other words, all the query's terms must appear, but it doesn't matter in * what fields they appear.

    */ - public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer, Map boosts) { - this(matchVersion, fields, analyzer); + public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map boosts) { + this(fields, analyzer); this.boosts = boosts; } @@ -90,8 +89,8 @@ public class MultiFieldQueryParser extends QueryParser *

    In other words, all the query's terms must appear, but it doesn't matter in * what fields they appear.

    */ - public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer) { - super(matchVersion, null, analyzer); + public MultiFieldQueryParser(String[] fields, Analyzer analyzer) { + super(null, analyzer); this.fields = fields; } @@ -239,7 +238,6 @@ public class MultiFieldQueryParser extends QueryParser * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) * * - * @param matchVersion Lucene version to match; this is passed through to QueryParser. * @param queries Queries strings to parse * @param fields Fields to search on * @param analyzer Analyzer to use @@ -247,15 +245,13 @@ public class MultiFieldQueryParser extends QueryParser * @throws IllegalArgumentException if the length of the queries array differs * from the length of the fields array */ - public static Query parse(Version matchVersion, String[] queries, String[] fields, - Analyzer analyzer) throws ParseException - { + public static Query parse(String[] queries, String[] fields, Analyzer analyzer) throws ParseException { if (queries.length != fields.length) throw new IllegalArgumentException("queries.length != fields.length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); + QueryParser qp = new QueryParser(fields[i], analyzer); Query q = qp.parse(queries[i]); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { @@ -288,7 +284,6 @@ public class MultiFieldQueryParser extends QueryParser * * * - * @param matchVersion Lucene version to match; this is passed through to QueryParser. * @param query Query string to parse * @param fields Fields to search on * @param flags Flags describing the fields @@ -297,13 +292,13 @@ public class MultiFieldQueryParser extends QueryParser * @throws IllegalArgumentException if the length of the fields array differs * from the length of the flags array */ - public static Query parse(Version matchVersion, String query, String[] fields, + public static Query parse(String query, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (fields.length != flags.length) throw new IllegalArgumentException("fields.length != flags.length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); + QueryParser qp = new QueryParser(fields[i], analyzer); Query q = qp.parse(query); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { @@ -337,7 +332,6 @@ public class MultiFieldQueryParser extends QueryParser * * * - * @param matchVersion Lucene version to match; this is passed through to QueryParser. * @param queries Queries string to parse * @param fields Fields to search on * @param flags Flags describing the fields @@ -346,7 +340,7 @@ public class MultiFieldQueryParser extends QueryParser * @throws IllegalArgumentException if the length of the queries, fields, * and flags array differ */ - public static Query parse(Version matchVersion, String[] queries, String[] fields, BooleanClause.Occur[] flags, + public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (!(queries.length == fields.length && queries.length == flags.length)) @@ -354,7 +348,7 @@ public class MultiFieldQueryParser extends QueryParser BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); + QueryParser qp = new QueryParser(fields[i], analyzer); Query q = qp.parse(queries[i]); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index 2d7e29b29d4..af0db955ecf 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -12,7 +12,6 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.Version; /** * This class is generated by JavaCC. The most important method is @@ -76,14 +75,6 @@ import org.apache.lucene.util.Version; *

    NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. - * - * - *

    NOTE: You must specify the required {@link Version} - * compatibility when creating QueryParser: - *

    */ public class QueryParser extends QueryParserBase implements QueryParserConstants { /** The default operator for parsing queries. @@ -92,13 +83,12 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants static public enum Operator { OR, AND } /** Create a query parser. - * @param matchVersion Lucene version to match. See above. * @param f the default field for query terms. * @param a used to find terms in the query text. */ - public QueryParser(Version matchVersion, String f, Analyzer a) { + public QueryParser(String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); - init(matchVersion, f, a); + init(f, a); } // * Query ::= ( Clause )* diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj index 200d5e992cb..19ec6252f2c 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj @@ -36,7 +36,6 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.Version; /** * This class is generated by JavaCC. The most important method is @@ -100,14 +99,6 @@ import org.apache.lucene.util.Version; *

    NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. - * - * - *

    NOTE: You must specify the required {@link Version} - * compatibility when creating QueryParser: - *

    */ public class QueryParser extends QueryParserBase { /** The default operator for parsing queries. @@ -116,13 +107,12 @@ public class QueryParser extends QueryParserBase { static public enum Operator { OR, AND } /** Create a query parser. - * @param matchVersion Lucene version to match. See above. * @param f the default field for query terms. * @param a used to find terms in the query text. */ - public QueryParser(Version matchVersion, String f, Analyzer a) { + public QueryParser(String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); - init(matchVersion, f, a); + init(f, a); } } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index b42dd36fc52..d84bbc7a886 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -33,7 +33,6 @@ import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanQuery.TooManyClauses; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; -import org.apache.lucene.util.Version; /** This class is overridden by QueryParser in QueryParser.jj * and acts to separate the majority of the Java code from the .jj grammar file. @@ -89,11 +88,10 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer } /** Initializes a query parser. Called by the QueryParser constructor - * @param matchVersion Lucene version to match. * @param f the default field for query terms. * @param a used to find terms in the query text. */ - public void init(Version matchVersion, String f, Analyzer a) { + public void init(String f, Analyzer a) { setAnalyzer(a); field = f; setAutoGeneratePhraseQueries(false); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java index 0e76a1969ef..fc5dd7cded6 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java @@ -38,7 +38,6 @@ import org.apache.lucene.search.spans.SpanNotQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.Version; /** * QueryParser which permits complex phrase query syntax eg "(john jon @@ -80,8 +79,8 @@ public class ComplexPhraseQueryParser extends QueryParser { private ComplexPhraseQuery currentPhraseQuery = null; - public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) { - super(matchVersion, f, a); + public ComplexPhraseQueryParser(String f, Analyzer a) { + super(f, a); } @Override diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java index 8c0e26b2548..37398da4e4b 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java @@ -84,24 +84,18 @@ public class ExtendableQueryParser extends QueryParser { /** * Creates a new {@link ExtendableQueryParser} instance * - * @param matchVersion - * the lucene version to use. * @param f * the default query field * @param a * the analyzer used to find terms in a query string */ - public ExtendableQueryParser(final Version matchVersion, final String f, - final Analyzer a) { - this(matchVersion, f, a, DEFAULT_EXTENSION); - + public ExtendableQueryParser(final String f, final Analyzer a) { + this(f, a, DEFAULT_EXTENSION); } /** * Creates a new {@link ExtendableQueryParser} instance * - * @param matchVersion - * the lucene version to use. * @param f * the default query field * @param a @@ -109,9 +103,9 @@ public class ExtendableQueryParser extends QueryParser { * @param ext * the query parser extensions */ - public ExtendableQueryParser(final Version matchVersion, final String f, + public ExtendableQueryParser(final String f, final Analyzer a, final Extensions ext) { - super(matchVersion, f, a); + super(f, a); this.defaultField = f; this.extensions = ext; } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java index 424d2c7571a..21e9eeb238f 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java @@ -86,7 +86,7 @@ public class UserInputQueryBuilder implements QueryBuilder { * @return QueryParser */ protected QueryParser createQueryParser(String fieldName, Analyzer analyzer) { - return new QueryParser(Version.LUCENE_CURRENT, fieldName, analyzer); + return new QueryParser(fieldName, analyzer); } } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java index 9d060f1d8ac..3c941bb8c0c 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java @@ -120,7 +120,7 @@ public class TestAnalyzingQueryParser extends LuceneTestCase { assertEquals("Should have returned nothing", true, ex); ex = false; - AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a); + AnalyzingQueryParser qp = new AnalyzingQueryParser(FIELD, a); try{ qp.analyzeSingleChunk(FIELD, "", "not a single chunk"); } catch (ParseException e){ @@ -212,7 +212,7 @@ public class TestAnalyzingQueryParser extends LuceneTestCase { } private Query getAnalyzedQuery(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException { - AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a); + AnalyzingQueryParser qp = new AnalyzingQueryParser(FIELD, a); qp.setAllowLeadingWildcard(allowLeadingWildcard); org.apache.lucene.search.Query q = qp.parse(s); return q; @@ -264,7 +264,7 @@ public class TestAnalyzingQueryParser extends LuceneTestCase { public void testByteTerms() throws Exception { String s = "เข"; Analyzer analyzer = new MockBytesAnalyzer(); - QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, analyzer); + QueryParser qp = new AnalyzingQueryParser(FIELD, analyzer); Query q = qp.parse("[เข TO เข]"); assertEquals(true, isAHit(q, s, analyzer)); } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java index 1fa596a8f4f..67bdde4d2db 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java @@ -40,7 +40,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { public void testMultiAnalyzer() throws ParseException { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "", new MultiAnalyzer()); + QueryParser qp = new QueryParser("", new MultiAnalyzer()); // trivial, no multiple tokens: assertEquals("foo", qp.parse("foo").toString()); @@ -113,7 +113,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { } public void testPosIncrementAnalyzer() throws ParseException { - QueryParser qp = new QueryParser(Version.LUCENE_4_0, "", new PosIncrementAnalyzer()); + QueryParser qp = new QueryParser("", new PosIncrementAnalyzer()); assertEquals("quick brown", qp.parse("the quick brown").toString()); assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); } @@ -234,7 +234,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { private final static class DumbQueryParser extends QueryParser { public DumbQueryParser(String f, Analyzer a) { - super(TEST_VERSION_CURRENT, f, a); + super(f, a); } /** expose super's version */ diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java index 80f4e3d6edc..ae3b50b4ba5 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java @@ -62,18 +62,18 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] fields = {"b", "t"}; Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; TestQueryParser.QPTestAnalyzer a = new TestQueryParser.QPTestAnalyzer(); - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, a); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, a); Query q = mfqp.parse(qtxt); assertEquals(expectedRes, q.toString()); - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, qtxt, fields, occur, a); + q = MultiFieldQueryParser.parse(qtxt, fields, occur, a); assertEquals(expectedRes, q.toString()); } public void testSimple() throws Exception { String[] fields = {"b", "t"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random())); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random())); Query q = mfqp.parse("one"); assertEquals("b:one t:one", q.toString()); @@ -136,7 +136,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { boosts.put("b", Float.valueOf(5)); boosts.put("t", Float.valueOf(10)); String[] fields = {"b", "t"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random()), boosts); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random()), boosts); //Check for simple @@ -162,24 +162,24 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testStaticMethod1() throws ParseException { String[] fields = {"b", "t"}; String[] queries = {"one", "two"}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(queries, fields, new MockAnalyzer(random())); assertEquals("b:one t:two", q.toString()); String[] queries2 = {"+one", "+two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries2, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(queries2, fields, new MockAnalyzer(random())); assertEquals("(+b:one) (+t:two)", q.toString()); String[] queries3 = {"one", "+two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries3, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(queries3, fields, new MockAnalyzer(random())); assertEquals("b:one (+t:two)", q.toString()); String[] queries4 = {"one +more", "+two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries4, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(queries4, fields, new MockAnalyzer(random())); assertEquals("(b:one +b:more) (+t:two)", q.toString()); String[] queries5 = {"blah"}; try { - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries5, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(queries5, fields, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -189,11 +189,11 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { TestQueryParser.QPTestAnalyzer stopA = new TestQueryParser.QPTestAnalyzer(); String[] queries6 = {"((+stop))", "+((stop))"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries6, fields, stopA); + q = MultiFieldQueryParser.parse(queries6, fields, stopA); assertEquals("", q.toString()); String[] queries7 = {"one ((+stop)) +more", "+((stop)) +two"}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries7, fields, stopA); + q = MultiFieldQueryParser.parse(queries7, fields, stopA); assertEquals("(b:one +b:more) (+t:two)", q.toString()); } @@ -201,15 +201,15 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testStaticMethod2() throws ParseException { String[] fields = {"b", "t"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse("one", fields, flags, new MockAnalyzer(random())); assertEquals("+b:one -t:one", q.toString()); - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse("one two", fields, flags, new MockAnalyzer(random())); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse("blah", fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -221,15 +221,15 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { //int[] flags = {MultiFieldQueryParser.REQUIRED_FIELD, MultiFieldQueryParser.PROHIBITED_FIELD}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random()));//, fields, flags, new MockAnalyzer(random)); + Query q = MultiFieldQueryParser.parse("one", fields, flags, new MockAnalyzer(random()));//, fields, flags, new MockAnalyzer(random)); assertEquals("+b:one -t:one", q.toString()); - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse("one two", fields, flags, new MockAnalyzer(random())); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse("blah", fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -241,12 +241,12 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] fields = {"f1", "f2", "f3"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(queries, fields, flags, new MockAnalyzer(random())); assertEquals("+f1:one -f2:two f3:three", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(queries, fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -257,12 +257,12 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] queries = {"one", "two"}; String[] fields = {"b", "t"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(queries, fields, flags, new MockAnalyzer(random())); assertEquals("+b:one -t:two", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(queries, fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -271,7 +271,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testAnalyzerReturningNull() throws ParseException { String[] fields = new String[] { "f1", "f2", "f3" }; - MultiFieldQueryParser parser = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new AnalyzerReturningNull()); + MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new AnalyzerReturningNull()); Query q = parser.parse("bla AND blo"); assertEquals("+(f2:bla f3:bla) +(f2:blo f3:blo)", q.toString()); // the following queries are not affected as their terms are not analyzed anyway: @@ -293,7 +293,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { iw.shutdown(); MultiFieldQueryParser mfqp = - new MultiFieldQueryParser(TEST_VERSION_CURRENT, new String[] {"body"}, analyzer); + new MultiFieldQueryParser(new String[] {"body"}, analyzer); mfqp.setDefaultOperator(QueryParser.Operator.AND); Query q = mfqp.parse("the footest"); IndexReader ir = DirectoryReader.open(ramDir); @@ -334,7 +334,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testSimpleRegex() throws ParseException { String[] fields = new String[] {"a", "b"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random())); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random())); BooleanQuery bq = new BooleanQuery(true); bq.add(new RegexpQuery(new Term("a", "[a-z][123]")), Occur.SHOULD); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java index 9f986c83099..73c6c34ad20 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java @@ -97,7 +97,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase { new TokenAndPos("c", 2) }; - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); + QueryParser qp = new QueryParser("field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); Query q = qp.parse("\"this text is acually ignored\""); assertTrue("wrong query type!", q instanceof MultiPhraseQuery); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java index 150733efed8..472f1f4b56e 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java @@ -18,7 +18,6 @@ package org.apache.lucene.queryparser.classic; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -46,7 +45,7 @@ public class TestQueryParser extends QueryParserTestBase { public static class QPTestParser extends QueryParser { public QPTestParser(String f, Analyzer a) { - super(TEST_VERSION_CURRENT, f, a); + super(f, a); } @Override @@ -64,7 +63,7 @@ public class TestQueryParser extends QueryParserTestBase { public QueryParser getParser(Analyzer a) throws Exception { if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, getDefaultField(), a); + QueryParser qp = new QueryParser(getDefaultField(), a); qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); return qp; } @@ -171,7 +170,7 @@ public class TestQueryParser extends QueryParserTestBase { } public void testFuzzySlopeExtendability() throws ParseException { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { + QueryParser qp = new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { @Override Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) @@ -195,7 +194,7 @@ public class TestQueryParser extends QueryParserTestBase { @Override public void testStarParsing() throws Exception { final int[] type = new int[1]; - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", + QueryParser qp = new QueryParser("field", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { @Override protected Query getWildcardQuery(String field, String termStr) { @@ -282,7 +281,7 @@ public class TestQueryParser extends QueryParserTestBase { Analyzer morePrecise = new Analyzer2(); public SmartQueryParser() { - super(TEST_VERSION_CURRENT, "field", new Analyzer1()); + super("field", new Analyzer1()); } @Override @@ -296,7 +295,7 @@ public class TestQueryParser extends QueryParserTestBase { @Override public void testNewFieldQuery() throws Exception { /** ordinary behavior, synonyms form uncoordinated boolean query */ - QueryParser dumb = new QueryParser(TEST_VERSION_CURRENT, "field", + QueryParser dumb = new QueryParser("field", new Analyzer1()); BooleanQuery expanded = new BooleanQuery(true); expanded.add(new TermQuery(new Term("field", "dogs")), @@ -333,7 +332,7 @@ public class TestQueryParser extends QueryParserTestBase { BooleanQuery expected = new BooleanQuery(true); expected.add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD); expected.add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockSynonymAnalyzer()); assertEquals(expected, qp.parse("dogs")); assertEquals(expected, qp.parse("\"dogs\"")); qp.setDefaultOperator(Operator.AND); @@ -349,7 +348,7 @@ public class TestQueryParser extends QueryParserTestBase { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "old")); expected.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") }); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockSynonymAnalyzer()); assertEquals(expected, qp.parse("\"old dogs\"")); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("\"old dogs\"")); @@ -403,7 +402,7 @@ public class TestQueryParser extends QueryParserTestBase { BooleanQuery expected = new BooleanQuery(true); expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); expected.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("国")); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("国")); @@ -419,7 +418,7 @@ public class TestQueryParser extends QueryParserTestBase { inner.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner, BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("中国")); expected.setBoost(2.0f); assertEquals(expected, qp.parse("中国^2")); @@ -437,7 +436,7 @@ public class TestQueryParser extends QueryParserTestBase { inner2.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner2.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner2, BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("中国国")); expected.setBoost(2.0f); assertEquals(expected, qp.parse("中国国^2")); @@ -451,7 +450,7 @@ public class TestQueryParser extends QueryParserTestBase { inner.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner, BooleanClause.Occur.MUST); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("中国")); expected.setBoost(2.0f); @@ -470,7 +469,7 @@ public class TestQueryParser extends QueryParserTestBase { inner2.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner2.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner2, BooleanClause.Occur.MUST); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("中国国")); expected.setBoost(2.0f); @@ -482,7 +481,7 @@ public class TestQueryParser extends QueryParserTestBase { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "中")); expected.add(new Term[] { new Term("field", "国"), new Term("field", "國")}); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("\"中国\"")); expected.setBoost(2.0f); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java index b82748d0702..4d0950c5adc 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java @@ -87,7 +87,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { } private void checkBadQuery(String qString) { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); qp.setInOrder(inOrder); Throwable expected = null; try { @@ -101,7 +101,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { private void checkMatches(String qString, String expectedVals) throws Exception { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); qp.setInOrder(inOrder); qp.setFuzzyPrefixLength(1); // usually a good idea @@ -141,7 +141,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { } public void testHashcodeEquals() throws Exception { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); qp.setInOrder(true); qp.setFuzzyPrefixLength(1); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java index 4d6bba4cc73..a2899d8a5e5 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java @@ -48,8 +48,8 @@ public class TestExtendableQueryParser extends TestQueryParser { if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); QueryParser qp = extensions == null ? new ExtendableQueryParser( - TEST_VERSION_CURRENT, getDefaultField(), a) : new ExtendableQueryParser( - TEST_VERSION_CURRENT, getDefaultField(), a, extensions); + getDefaultField(), a) : new ExtendableQueryParser( + getDefaultField(), a, extensions); qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); return qp; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 9cbf841edad..d36f6014d78 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -28,8 +28,10 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.codecs.lucene49.Lucene49Codec; @@ -257,10 +259,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { @Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { if (fieldName.equals("textgrams") && minPrefixChars > 0) { - return new TokenStreamComponents(components.getTokenizer(), - new EdgeNGramTokenFilter(matchVersion, - components.getTokenStream(), - 1, minPrefixChars)); + // TODO: should use an EdgeNGramTokenFilterFactory here + TokenFilter filter; + if (matchVersion.onOrAfter(Version.LUCENE_4_4)) { + filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars); + } else { + filter = new Lucene43EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars); + } + return new TokenStreamComponents(components.getTokenizer(), filter); } else { return components; } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 040258fffa3..b0f40014bc1 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -333,13 +333,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testSuggestStopFilter() throws Exception { - final CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "a"); + final CharArraySet stopWords = StopFilter.makeStopSet("a"); Analyzer indexAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { MockTokenizer tokens = new MockTokenizer(); return new TokenStreamComponents(tokens, - new StopFilter(TEST_VERSION_CURRENT, tokens, stopWords)); + new StopFilter(tokens, stopWords)); } }; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java index dca3193a8cb..45883582451 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java @@ -46,7 +46,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_LINEAR, @@ -84,7 +84,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { }; File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); // BlenderType.LINEAR is used by default (remove position*10%) BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a); @@ -125,7 +125,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { }; File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); // if factor is small, we don't get the expected element BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, @@ -175,7 +175,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { }; File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); // if factor is small, we don't get the expected element BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java index 6d1bfb19c42..aaf6605a5d3 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java @@ -244,8 +244,8 @@ public class TestFreeTextSuggester extends LuceneTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(); - CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of"); - return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet)); + CharArraySet stopSet = StopFilter.makeStopSet("of"); + return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet)); } }; @@ -272,8 +272,8 @@ public class TestFreeTextSuggester extends LuceneTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(); - CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of"); - return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet)); + CharArraySet stopSet = StopFilter.makeStopSet("of"); + return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet)); } }; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java index d57a077f84a..e42342a1ee8 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testEndNotStopWord() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to"); + CharArraySet stopWords = StopFilter.makeStopSet("to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -47,7 +47,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testEndIsStopWord() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to"); + CharArraySet stopWords = StopFilter.makeStopSet("to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to ")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -66,7 +66,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMidStopWord() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to"); + CharArraySet stopWords = StopFilter.makeStopSet("to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to school")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -86,7 +86,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMultipleStopWords() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to", "the", "a"); + CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the school")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -106,7 +106,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMultipleStopWordsEnd() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to", "the", "a"); + CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -126,7 +126,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMultipleStopWordsEnd2() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to", "the", "a"); + CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the ")); TokenStream filter = new SuggestStopFilter(stream, stopWords); diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 80bd08ca9de..4481d56b55e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -675,7 +675,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar IndexSchema schema = core.getLatestSchema(); String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType"); FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); - Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion) + Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer() : fieldType.getQueryAnalyzer(); //TODO: There's got to be a better way! Where's Spring when you need it? queryConverter.setAnalyzer(analyzer); diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java index 1627fc7c6ab..d74bea77c4e 100644 --- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java @@ -82,7 +82,7 @@ public class ManagedStopFilterFactory extends BaseManagedTokenFilterFactory { // which is slightly inefficient to do for every instance of the managed filter // but ManagedResource's don't have access to the luceneMatchVersion boolean ignoreCase = args.getBooleanArg("ignoreCase"); - stopWords = new CharArraySet(luceneMatchVersion, managedWords.size(), ignoreCase); + stopWords = new CharArraySet(managedWords.size(), ignoreCase); stopWords.addAll(managedWords); } @@ -94,6 +94,6 @@ public class ManagedStopFilterFactory extends BaseManagedTokenFilterFactory { if (stopWords == null) { throw new IllegalStateException("Managed stopwords not initialized correctly!"); } - return new StopFilter(luceneMatchVersion, input, stopWords); + return new StopFilter(input, stopWords); } } diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index ec98bc4ec71..19bf9db57ac 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -147,7 +147,7 @@ public class CollationField extends FieldType { else throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition); } - analyzer = new CollationKeyAnalyzer(Version.LUCENE_CURRENT, collator); + analyzer = new CollationKeyAnalyzer(collator); } /** diff --git a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java index 075d5a9ee02..8cd3f26890b 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java @@ -261,28 +261,20 @@ public final class FieldTypePluginLoader try { // No need to be core-aware as Analyzers are not in the core-aware list final Class clazz = loader.findClass(analyzerName, Analyzer.class); - - try { - // first try to use a ctor with version parameter - // (needed for many new Analyzers that have no default one anymore) - Constructor cnstr - = clazz.getConstructor(Version.class); - final String matchVersionStr - = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM); - final Version luceneMatchVersion = (matchVersionStr == null) ? - schema.getDefaultLuceneMatchVersion() : - Config.parseLuceneVersionString(matchVersionStr); - if (luceneMatchVersion == null) { - throw new SolrException - ( SolrException.ErrorCode.SERVER_ERROR, - "Configuration Error: Analyzer '" + clazz.getName() + - "' needs a 'luceneMatchVersion' parameter"); - } - return cnstr.newInstance(luceneMatchVersion); - } catch (NoSuchMethodException nsme) { - // otherwise use default ctor - return clazz.newInstance(); + Analyzer analyzer = clazz.newInstance(); + + final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM); + final Version luceneMatchVersion = (matchVersionStr == null) ? + schema.getDefaultLuceneMatchVersion() : + Config.parseLuceneVersionString(matchVersionStr); + if (luceneMatchVersion == null) { + throw new SolrException + ( SolrException.ErrorCode.SERVER_ERROR, + "Configuration Error: Analyzer '" + clazz.getName() + + "' needs a 'luceneMatchVersion' parameter"); } + analyzer.setVersion(luceneMatchVersion); + return analyzer; } catch (Exception e) { log.error("Cannot load analyzer: "+analyzerName, e); throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java index 0cf9bc6b771..7d0165f130b 100644 --- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -88,7 +88,7 @@ class ComplexPhraseQParser extends QParser { defaultField = getReq().getSchema().getDefaultSearchFieldName(); } - lparser = new ComplexPhraseQueryParser(getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, getReq().getSchema().getQueryAnalyzer()); + lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()); if (localParams != null) inOrder = localParams.getBool("inOrder", inOrder); diff --git a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java index 01f01a39de9..ddde5004654 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java @@ -74,7 +74,7 @@ public abstract class SolrSpellChecker { analyzer = fieldType.getQueryAnalyzer(); } if (analyzer == null) { - analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion); + analyzer = new WhitespaceAnalyzer(); } return name; } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml b/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml index ae157769051..ff90e0dfe4c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml @@ -37,11 +37,11 @@ - - + + - - + + @@ -49,8 +49,8 @@ - - + + diff --git a/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java b/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java index 3ae0dc2db47..e755f3c8fa1 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java @@ -16,14 +16,12 @@ */ package org.apache.solr.analysis; -import java.lang.reflect.Field; - import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.Config; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.FieldType; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.util.Version; import org.junit.BeforeClass; @@ -56,18 +54,15 @@ public class TestLuceneMatchVersion extends SolrTestCaseJ4 { assertEquals(Version.LUCENE_4_0, (ana.getTokenizerFactory()).getLuceneMatchVersion()); assertEquals(Version.LUCENE_5_0, (ana.getTokenFilterFactories()[2]).getLuceneMatchVersion()); - // this is a hack to get the private matchVersion field in StandardAnalyzer's superclass, may break in later lucene versions - we have no getter :( - final Field matchVersionField = StandardAnalyzer.class.getSuperclass().getDeclaredField("matchVersion"); - matchVersionField.setAccessible(true); - - type = schema.getFieldType("textStandardAnalyzerDefault"); + type = schema.getFieldType("textTurkishAnalyzerDefault"); Analyzer ana1 = type.getIndexAnalyzer(); - assertTrue(ana1 instanceof StandardAnalyzer); - assertEquals(DEFAULT_VERSION, matchVersionField.get(ana1)); + assertTrue(ana1 instanceof TurkishAnalyzer); + System.out.println("DEFAULT_VERSION = " + ana1.getVersion().name()); + assertEquals(DEFAULT_VERSION, ana1.getVersion()); - type = schema.getFieldType("textStandardAnalyzer40"); + type = schema.getFieldType("textTurkishAnalyzer40"); ana1 = type.getIndexAnalyzer(); - assertTrue(ana1 instanceof StandardAnalyzer); - assertEquals(Version.LUCENE_4_0, matchVersionField.get(ana1)); + assertTrue(ana1 instanceof TurkishAnalyzer); + assertEquals(Version.LUCENE_4_0, ana1.getVersion()); } } diff --git a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java index 7bbc46c2824..0cba95bc851 100644 --- a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java +++ b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java @@ -115,7 +115,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ Directory dir = newFSDirectory(newDir); IndexWriter iw = new IndexWriter( dir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)) + new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer()) ); Document doc = new Document(); doc.add(new TextField("id", "2", Field.Store.YES)); diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index 2b84608130f..c0a838cbc8a 100644 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -172,7 +172,7 @@ public class HighlighterTest extends SolrTestCaseJ4 { @Test public void testTermOffsetsTokenStream() throws Exception { String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; - Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a1 = new WhitespaceAnalyzer(); TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n"); tokenStream.reset(); @@ -180,7 +180,7 @@ public class HighlighterTest extends SolrTestCaseJ4 { tokenStream); for( String v : multivalued ){ TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() ); - Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a2 = new WhitespaceAnalyzer(); TokenStream ts2 = a2.tokenStream("", v); ts2.reset(); diff --git a/solr/core/src/test/org/apache/solr/search/TestSort.java b/solr/core/src/test/org/apache/solr/search/TestSort.java index d1a909bdde6..601ea5faa22 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSort.java +++ b/solr/core/src/test/org/apache/solr/search/TestSort.java @@ -189,7 +189,7 @@ public class TestSort extends SolrTestCaseJ4 { for (int iterCnt = 0; iterCnt convert(String origQuery) { Collection result = new HashSet<>(); - WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT); + WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); try (TokenStream ts = analyzer.tokenStream("", origQuery)) { // TODO: support custom attributes diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java index bccdbbcf56b..7f1cd737c54 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java @@ -40,7 +40,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void test() throws Exception { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + converter.setAnalyzer(new WhitespaceAnalyzer()); Collection tokens = converter.convert("field:foo"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1); @@ -50,7 +50,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testSpecialChars() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + converter.setAnalyzer(new WhitespaceAnalyzer()); String original = "field_with_underscore:value_with_underscore"; Collection tokens = converter.convert(original); assertTrue("tokens is null and it shouldn't be", tokens != null); @@ -96,7 +96,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testUnicode() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + converter.setAnalyzer(new WhitespaceAnalyzer()); // chinese text value Collection tokens = converter.convert("text_field:我购买了道具和服装。"); @@ -116,7 +116,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testMultipleClauses() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + converter.setAnalyzer(new WhitespaceAnalyzer()); // two field:value pairs should give two tokens Collection tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar"); @@ -133,7 +133,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testRequiredOrProhibitedFlags() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + converter.setAnalyzer(new WhitespaceAnalyzer()); { List tokens = new ArrayList<>(converter.convert("aaa bbb ccc")); diff --git a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java index fb5c2b477fc..2fb2423f859 100644 --- a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java +++ b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java @@ -52,8 +52,8 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase { Tokenizer tokenizer = new KeywordTokenizer(); TokenStream filter = new PatternReplaceFilter(tokenizer, Pattern.compile("([^\\p{L}\\p{M}\\p{N}\\p{Cs}]*[\\p{L}\\p{M}\\p{N}\\p{Cs}\\_]+:)|([^\\p{L}\\p{M}\\p{N}\\p{Cs}])+"), " ", true); - filter = new LowerCaseFilter(TEST_VERSION_CURRENT, filter); - filter = new TrimFilter(TEST_VERSION_CURRENT, filter); + filter = new LowerCaseFilter(filter); + filter = new TrimFilter(filter); return new TokenStreamComponents(tokenizer, filter); } });