From e6d29d223b14778cf22682268539534160458089 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 31 Jul 2014 10:44:39 +0000 Subject: [PATCH] LUCENE-5859: Literally add back dead code to please a bunch of fucking babies git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1614852 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/ar/ArabicAnalyzer.java | 26 +++++---- .../lucene/analysis/bg/BulgarianAnalyzer.java | 27 +++++----- .../lucene/analysis/br/BrazilianAnalyzer.java | 30 ++++++----- .../lucene/analysis/ca/CatalanAnalyzer.java | 28 +++++----- .../charfilter/HTMLStripCharFilter.java | 5 +- .../charfilter/HTMLStripCharFilter.jflex | 3 +- .../lucene/analysis/cjk/CJKAnalyzer.java | 19 ++++--- .../lucene/analysis/ckb/SoraniAnalyzer.java | 28 +++++----- .../commongrams/CommonGramsFilter.java | 2 +- .../commongrams/CommonGramsFilterFactory.java | 2 +- .../lucene/analysis/core/KeywordAnalyzer.java | 2 + .../analysis/core/KeywordTokenizer.java | 2 + .../core/KeywordTokenizerFactory.java | 1 + .../lucene/analysis/core/LetterTokenizer.java | 23 ++++++-- .../analysis/core/LetterTokenizerFactory.java | 3 +- .../lucene/analysis/core/LowerCaseFilter.java | 13 ++++- .../analysis/core/LowerCaseFilterFactory.java | 3 +- .../analysis/core/LowerCaseTokenizer.java | 28 ++++++++-- .../core/LowerCaseTokenizerFactory.java | 3 +- .../lucene/analysis/core/SimpleAnalyzer.java | 20 ++++++- .../lucene/analysis/core/StopAnalyzer.java | 48 +++++++++++------ .../lucene/analysis/core/StopFilter.java | 43 ++++++++++----- .../analysis/core/StopFilterFactory.java | 5 +- .../lucene/analysis/core/TypeTokenFilter.java | 11 ++-- .../analysis/core/TypeTokenFilterFactory.java | 2 +- .../lucene/analysis/core/UpperCaseFilter.java | 10 +++- .../analysis/core/UpperCaseFilterFactory.java | 3 +- .../analysis/core/WhitespaceAnalyzer.java | 21 +++++++- .../analysis/core/WhitespaceTokenizer.java | 29 ++++++++-- .../core/WhitespaceTokenizerFactory.java | 4 +- .../lucene/analysis/cz/CzechAnalyzer.java | 31 ++++++----- .../lucene/analysis/da/DanishAnalyzer.java | 28 +++++----- .../lucene/analysis/de/GermanAnalyzer.java | 28 +++++----- .../lucene/analysis/el/GreekAnalyzer.java | 18 ++++--- .../analysis/el/GreekLowerCaseFilter.java | 14 ++++- .../el/GreekLowerCaseFilterFactory.java | 3 +- .../lucene/analysis/el/GreekStemmer.java | 33 ++++++------ .../lucene/analysis/en/EnglishAnalyzer.java | 28 +++++----- .../analysis/en/EnglishPossessiveFilter.java | 4 +- .../en/EnglishPossessiveFilterFactory.java | 3 +- .../apache/lucene/analysis/en/KStemmer.java | 3 +- .../lucene/analysis/es/SpanishAnalyzer.java | 28 +++++----- .../lucene/analysis/eu/BasqueAnalyzer.java | 26 +++++---- .../lucene/analysis/fa/PersianAnalyzer.java | 17 +++--- .../lucene/analysis/fi/FinnishAnalyzer.java | 28 +++++----- .../lucene/analysis/fr/FrenchAnalyzer.java | 30 ++++++----- .../lucene/analysis/ga/IrishAnalyzer.java | 30 ++++++----- .../lucene/analysis/gl/GalicianAnalyzer.java | 28 +++++----- .../lucene/analysis/hi/HindiAnalyzer.java | 24 +++++---- .../lucene/analysis/hu/HungarianAnalyzer.java | 28 +++++----- .../lucene/analysis/hunspell/Stemmer.java | 2 +- .../lucene/analysis/hy/ArmenianAnalyzer.java | 26 +++++---- .../analysis/id/IndonesianAnalyzer.java | 28 ++++++---- .../lucene/analysis/it/ItalianAnalyzer.java | 30 ++++++----- .../lucene/analysis/lv/LatvianAnalyzer.java | 28 +++++----- .../CapitalizationFilterFactory.java | 2 +- .../miscellaneous/CodepointCountFilter.java | 6 ++- .../CodepointCountFilterFactory.java | 2 +- .../miscellaneous/KeepWordFilter.java | 6 ++- .../miscellaneous/KeepWordFilterFactory.java | 3 +- .../analysis/miscellaneous/LengthFilter.java | 6 ++- .../miscellaneous/LengthFilterFactory.java | 2 +- .../RemoveDuplicatesTokenFilter.java | 4 +- .../analysis/miscellaneous/TrimFilter.java | 7 ++- .../miscellaneous/TrimFilterFactory.java | 2 +- .../analysis/ngram/EdgeNGramTokenFilter.java | 2 +- .../analysis/ngram/NGramTokenFilter.java | 4 +- .../lucene/analysis/ngram/NGramTokenizer.java | 2 +- .../lucene/analysis/nl/DutchAnalyzer.java | 38 +++++++------ .../lucene/analysis/no/NorwegianAnalyzer.java | 28 +++++----- .../analysis/pt/PortugueseAnalyzer.java | 28 +++++----- .../lucene/analysis/pt/RSLPStemmerBase.java | 4 +- .../query/QueryAutoStopWordAnalyzer.java | 25 ++++++--- .../analysis/reverse/ReverseStringFilter.java | 32 +++++++---- .../reverse/ReverseStringFilterFactory.java | 3 +- .../lucene/analysis/ro/RomanianAnalyzer.java | 25 +++++---- .../lucene/analysis/ru/RussianAnalyzer.java | 28 +++++----- .../shingle/ShingleAnalyzerWrapper.java | 9 ++-- .../analysis/standard/ClassicAnalyzer.java | 42 +++++++++++---- .../analysis/standard/ClassicTokenizer.java | 13 +++-- .../standard/ClassicTokenizerFactory.java | 3 +- .../analysis/standard/StandardAnalyzer.java | 54 ++++++++++++++----- .../analysis/standard/StandardFilter.java | 3 +- .../standard/StandardFilterFactory.java | 3 +- .../analysis/standard/StandardTokenizer.java | 13 +++-- .../standard/StandardTokenizerFactory.java | 3 +- .../standard/UAX29URLEmailAnalyzer.java | 36 ++++++++----- .../standard/UAX29URLEmailTokenizer.java | 15 ++++-- .../UAX29URLEmailTokenizerFactory.java | 3 +- .../lucene/analysis/sv/SwedishAnalyzer.java | 28 +++++----- .../synonym/SynonymFilterFactory.java | 11 ++-- .../lucene/analysis/th/ThaiAnalyzer.java | 19 ++++--- .../lucene/analysis/th/ThaiWordFilter.java | 3 +- .../analysis/th/ThaiWordFilterFactory.java | 3 +- .../lucene/analysis/tr/TurkishAnalyzer.java | 13 +++-- .../util/AbstractAnalysisFactory.java | 9 ++-- .../lucene/analysis/util/CharArrayMap.java | 53 ++++++++++++++---- .../lucene/analysis/util/CharArraySet.java | 46 +++++++++++++--- .../lucene/analysis/util/CharTokenizer.java | 16 ++++-- .../lucene/analysis/util/CharacterUtils.java | 27 +++++++--- .../analysis/util/FilteringTokenFilter.java | 5 +- .../analysis/util/StopwordAnalyzerBase.java | 34 ++++++++---- .../lucene/analysis/util/WordlistLoader.java | 16 +++--- .../analysis/ar/TestArabicAnalyzer.java | 20 +++---- .../analysis/ar/TestArabicStemFilter.java | 2 +- .../analysis/bg/TestBulgarianAnalyzer.java | 16 +++--- .../analysis/bg/TestBulgarianStemmer.java | 12 ++--- .../analysis/br/TestBrazilianStemmer.java | 14 ++--- .../analysis/ca/TestCatalanAnalyzer.java | 13 ++--- .../lucene/analysis/cjk/TestCJKAnalyzer.java | 10 ++-- .../analysis/cjk/TestCJKBigramFilter.java | 10 ++-- .../analysis/ckb/TestSoraniAnalyzer.java | 14 ++--- .../analysis/ckb/TestSoraniStemFilter.java | 2 +- .../commongrams/CommonGramsFilterTest.java | 31 +++++------ .../compound/TestCompoundWordTokenFilter.java | 2 +- .../lucene/analysis/core/TestAnalyzers.java | 39 +++++++------- .../analysis/core/TestBugInSomething.java | 6 +-- .../analysis/core/TestClassicAnalyzer.java | 23 ++++---- .../analysis/core/TestDuelingAnalyzers.java | 12 ++--- .../analysis/core/TestKeywordAnalyzer.java | 7 +-- .../analysis/core/TestRandomChains.java | 6 +-- .../analysis/core/TestStandardAnalyzer.java | 10 ++-- .../analysis/core/TestStopAnalyzer.java | 10 ++-- .../lucene/analysis/core/TestStopFilter.java | 24 ++++----- .../analysis/core/TestTypeTokenFilter.java | 12 ++--- .../core/TestUAX29URLEmailAnalyzer.java | 4 +- .../core/TestUAX29URLEmailTokenizer.java | 8 +-- .../lucene/analysis/cz/TestCzechAnalyzer.java | 11 ++-- .../lucene/analysis/cz/TestCzechStemmer.java | 16 +++--- .../analysis/da/TestDanishAnalyzer.java | 10 ++-- .../analysis/de/TestGermanAnalyzer.java | 14 ++--- .../de/TestGermanLightStemFilter.java | 2 +- .../de/TestGermanMinimalStemFilter.java | 2 +- .../analysis/de/TestGermanStemFilter.java | 4 +- .../lucene/analysis/el/GreekAnalyzerTest.java | 6 +-- .../lucene/analysis/el/TestGreekStemmer.java | 2 +- .../analysis/en/TestEnglishAnalyzer.java | 10 ++-- .../analysis/en/TestPorterStemFilter.java | 2 +- .../analysis/es/TestSpanishAnalyzer.java | 10 ++-- .../analysis/eu/TestBasqueAnalyzer.java | 10 ++-- .../analysis/fa/TestPersianAnalyzer.java | 18 +++---- .../analysis/fi/TestFinnishAnalyzer.java | 10 ++-- .../fi/TestFinnishLightStemFilter.java | 2 +- .../analysis/fr/TestFrenchAnalyzer.java | 18 +++---- .../fr/TestFrenchLightStemFilter.java | 2 +- .../fr/TestFrenchMinimalStemFilter.java | 2 +- .../lucene/analysis/ga/TestIrishAnalyzer.java | 14 ++--- .../analysis/gl/TestGalicianAnalyzer.java | 10 ++-- .../gl/TestGalicianMinimalStemFilter.java | 2 +- .../lucene/analysis/hi/TestHindiAnalyzer.java | 10 ++-- .../analysis/hu/TestHungarianAnalyzer.java | 10 ++-- .../hu/TestHungarianLightStemFilter.java | 2 +- .../hunspell/TestHunspellStemFilter.java | 2 +- .../analysis/hy/TestArmenianAnalyzer.java | 10 ++-- .../analysis/id/TestIndonesianAnalyzer.java | 10 ++-- .../analysis/it/TestItalianAnalyzer.java | 12 ++--- .../analysis/lv/TestLatvianAnalyzer.java | 10 ++-- .../TestCapitalizationFilter.java | 2 +- .../TestCodepointCountFilter.java | 8 +-- .../miscellaneous/TestKeepWordFilter.java | 6 +-- .../TestKeywordMarkerFilter.java | 10 ++-- .../miscellaneous/TestLengthFilter.java | 6 +-- .../TestLucene47WordDelimiterFilter.java | 9 ++-- .../TestPerFieldAnalyzerWrapper.java | 8 +-- .../TestStemmerOverrideFilter.java | 2 +- .../miscellaneous/TestTrimFilter.java | 7 +-- .../TestWordDelimiterFilter.java | 11 ++-- .../ngram/EdgeNGramTokenFilterTest.java | 4 +- .../analysis/ngram/NGramTokenFilterTest.java | 2 +- .../lucene/analysis/nl/TestDutchStemmer.java | 20 +++---- .../analysis/no/TestNorwegianAnalyzer.java | 10 ++-- .../no/TestNorwegianLightStemFilter.java | 2 +- .../no/TestNorwegianMinimalStemFilter.java | 2 +- .../analysis/pt/TestPortugueseAnalyzer.java | 10 ++-- .../pt/TestPortugueseLightStemFilter.java | 2 +- .../pt/TestPortugueseMinimalStemFilter.java | 2 +- .../analysis/pt/TestPortugueseStemFilter.java | 2 +- .../query/QueryAutoStopWordAnalyzerTest.java | 19 +++---- .../reverse/TestReverseStringFilter.java | 36 ++++++------- .../analysis/ro/TestRomanianAnalyzer.java | 10 ++-- .../analysis/ru/TestRussianAnalyzer.java | 10 ++-- .../ru/TestRussianLightStemFilter.java | 2 +- .../shingle/ShingleAnalyzerWrapperTest.java | 4 +- .../analysis/shingle/ShingleFilterTest.java | 2 +- .../sinks/TestTeeSinkTokenFilter.java | 14 ++--- .../analysis/sv/TestSwedishAnalyzer.java | 10 ++-- .../sv/TestSwedishLightStemFilter.java | 2 +- .../synonym/TestSolrSynonymParser.java | 4 +- .../analysis/tr/TestTurkishAnalyzer.java | 2 +- .../analysis/util/TestCharArrayMap.java | 8 +-- .../analysis/util/TestCharArraySet.java | 50 ++++++++--------- .../analysis/util/TestCharTokenizers.java | 12 ++--- .../analysis/util/TestCharacterUtils.java | 14 ++--- .../lucene/analysis/util/TestElision.java | 4 +- .../util/TestFilesystemResourceLoader.java | 3 +- .../analysis/util/TestWordlistLoader.java | 8 +-- .../segmentation/TestWithCJKBigramFilter.java | 5 +- .../lucene/analysis/ja/JapaneseAnalyzer.java | 16 +++--- .../ja/JapanesePartOfSpeechStopFilter.java | 6 ++- ...JapanesePartOfSpeechStopFilterFactory.java | 2 +- .../analysis/ja/TestJapaneseAnalyzer.java | 24 ++++----- .../ja/TestJapaneseBaseFormFilter.java | 3 +- .../ja/TestJapaneseKatakanaStemFilter.java | 3 +- .../morfologik/MorfologikAnalyzer.java | 16 +++--- .../analysis/morfologik/MorfologikFilter.java | 10 ++-- .../morfologik/MorfologikFilterFactory.java | 2 +- .../morfologik/TestMorfologikAnalyzer.java | 15 +++--- .../analysis/phonetic/TestPhoneticFilter.java | 4 +- .../cn/smart/SmartChineseAnalyzer.java | 5 +- .../lucene/analysis/pl/PolishAnalyzer.java | 28 +++++----- .../analysis/pl/TestPolishAnalyzer.java | 11 ++-- .../byTask/feeds/EnwikiQueryMaker.java | 2 +- .../byTask/feeds/FileBasedQueryMaker.java | 2 +- .../byTask/feeds/LongToEnglishQueryMaker.java | 2 +- .../byTask/feeds/ReutersQueryMaker.java | 2 +- .../byTask/feeds/SimpleQueryMaker.java | 2 +- .../quality/utils/SimpleQQParser.java | 2 +- .../SimpleNaiveBayesClassifierTest.java | 2 +- .../org/apache/lucene/demo/IndexFiles.java | 2 +- .../org/apache/lucene/demo/SearchFiles.java | 7 ++- .../demo/facet/AssociationsFacetsExample.java | 2 +- .../demo/facet/DistanceFacetsExample.java | 2 +- .../ExpressionAggregationFacetsExample.java | 2 +- .../MultiCategoryListsFacetsExample.java | 2 +- .../lucene/demo/facet/RangeFacetsExample.java | 2 +- .../demo/facet/SimpleFacetsExample.java | 2 +- .../facet/SimpleSortedSetFacetsExample.java | 2 +- .../demo/xmlparser/FormBasedXmlQueryDemo.java | 2 +- .../lucene/index/memory/MemoryIndexTest.java | 2 +- .../analyzing/AnalyzingQueryParser.java | 5 +- .../classic/MultiFieldQueryParser.java | 26 +++++---- .../queryparser/classic/QueryParser.java | 14 ++++- .../lucene/queryparser/classic/QueryParser.jj | 14 ++++- .../queryparser/classic/QueryParserBase.java | 4 +- .../ComplexPhraseQueryParser.java | 5 +- .../ext/ExtendableQueryParser.java | 14 +++-- .../xml/builders/UserInputQueryBuilder.java | 2 +- .../analyzing/TestAnalyzingQueryParser.java | 6 +-- .../classic/TestMultiAnalyzer.java | 6 +-- .../classic/TestMultiFieldQueryParser.java | 48 ++++++++--------- .../classic/TestMultiPhraseQueryParsing.java | 2 +- .../queryparser/classic/TestQueryParser.java | 29 +++++----- .../complexPhrase/TestComplexPhraseQuery.java | 6 +-- .../ext/TestExtendableQueryParser.java | 4 +- .../AnalyzingInfixSuggesterTest.java | 4 +- .../analyzing/BlendedInfixSuggesterTest.java | 8 +-- .../analyzing/TestFreeTextSuggester.java | 8 +-- .../analyzing/TestSuggestStopFilter.java | 12 ++--- .../component/SpellCheckComponent.java | 2 +- .../analysis/ManagedStopFilterFactory.java | 4 +- .../search/ComplexPhraseQParserPlugin.java | 2 +- .../solr/spelling/SolrSpellChecker.java | 2 +- .../conf/schema-luceneMatchVersion.xml | 12 ++--- .../solr/analysis/TestLuceneMatchVersion.java | 14 ++--- .../solr/core/TestArbitraryIndexDir.java | 2 +- .../solr/highlight/HighlighterTest.java | 4 +- .../test/org/apache/solr/search/TestSort.java | 2 +- .../spelling/IndexBasedSpellCheckerTest.java | 2 +- .../solr/spelling/SimpleQueryConverter.java | 2 +- .../spelling/SpellingQueryConverterTest.java | 10 ++-- .../TestSuggestSpellingConverter.java | 4 +- 261 files changed, 1840 insertions(+), 1248 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java index 4dea7abbc49..39e5a087f2c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Arabic. @@ -88,18 +89,20 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public ArabicAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public ArabicAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public ArabicAnalyzer(CharArraySet stopwords){ - this(stopwords, CharArraySet.EMPTY_SET); + public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords){ + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -107,14 +110,17 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * {@link ArabicStemFilter}. * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a set of terms not to be stemmed */ - public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){ - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){ + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -130,10 +136,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new LowerCaseFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new LowerCaseFilter(matchVersion, source); // the order here is important: the stopword list is not normalized! - result = new StopFilter(result, stopwords); + result = new StopFilter( matchVersion, result, stopwords); // TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?! result = new ArabicNormalizationFilter(result); if(!stemExclusionSet.isEmpty()) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java index 76e6ca05fc2..ffb9aed1b59 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.bg; import java.io.IOException; import java.io.Reader; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -30,6 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Bulgarian. @@ -40,7 +42,6 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase; *

*/ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { - /** * File containing default Bulgarian stopwords. * @@ -83,15 +84,15 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. */ - public BulgarianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public BulgarianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. */ - public BulgarianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -99,10 +100,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { * If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter} * before {@link BulgarianStemFilter}. */ - public BulgarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); - } + public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** * Creates a @@ -118,10 +119,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase { */ @Override public TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new BulgarianStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java index 4a36c9699af..cddd3920c24 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java @@ -65,7 +65,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#"); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -83,29 +83,35 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}). */ - public BrazilianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public BrazilianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public BrazilianAnalyzer(CharArraySet stopwords) { - super(stopwords); + public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) { + super(matchVersion, stopwords); } /** * Builds an analyzer with the given stop words and stemming exclusion words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public BrazilianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - this(stopwords); - excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, + CharArraySet stemExclusionSet) { + this(matchVersion, stopwords); + excltable = CharArraySet.unmodifiableSet(CharArraySet + .copy(matchVersion, stemExclusionSet)); } /** @@ -120,10 +126,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(); - TokenStream result = new LowerCaseFilter(source); - result = new StandardFilter(result); - result = new StopFilter(result, stopwords); + Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new LowerCaseFilter(matchVersion, source); + result = new StandardFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(excltable != null && !excltable.isEmpty()) result = new SetKeywordMarkerFilter(result, excltable); return new TokenStreamComponents(source, new BrazilianStemFilter(result)); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java index 61ca46bb8a1..342348204a8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.CatalanStemmer; /** @@ -45,7 +46,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet( + new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "d", "l", "m", "n", "s", "t" ), true)); @@ -80,17 +81,18 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public CatalanAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public CatalanAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public CatalanAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -98,12 +100,14 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -120,11 +124,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); result = new ElisionFilter(result, DEFAULT_ARTICLES); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new CatalanStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index ad304545195..b10e1797863 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; +import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.OpenStringBuilder; @@ -29840,7 +29841,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { upperCaseVariantsAccepted.put("amp", "AMP"); } private static final CharArrayMap entityValues - = new CharArrayMap<>(253, false); + = new CharArrayMap<>(Version.LUCENE_CURRENT, 253, false); static { String[] entities = { "AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2", @@ -29979,7 +29980,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter { escapeSTYLE = true; } else { if (null == this.escapedTags) { - this.escapedTags = new CharArraySet(16, true); + this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true); } this.escapedTags.add(tag); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex index 8c34577d8a6..4ec0785f6f3 100755 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; +import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.OpenStringBuilder; @@ -194,7 +195,7 @@ InlineElment = ( [aAbBiIqQsSuU] | escapeSTYLE = true; } else { if (null == this.escapedTags) { - this.escapedTags = new CharArraySet(16, true); + this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true); } this.escapedTags.add(tag); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java index dda8e939d17..958974c0dda 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk; */ import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -27,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; /** * An {@link Analyzer} that tokenizes text with {@link StandardTokenizer}, @@ -35,7 +37,6 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase; * and filters stopwords with {@link StopFilter} */ public final class CJKAnalyzer extends StopwordAnalyzerBase { - /** * File containing default CJK stopwords. *

@@ -69,27 +70,29 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer which removes words in {@link #getDefaultStopSet()}. */ - public CJKAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public CJKAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public CJKAnalyzer(CharArraySet stopwords){ - super(stopwords); + public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){ + super(matchVersion, stopwords); } @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); + final Tokenizer source = new StandardTokenizer(matchVersion); // run the widthfilter first before bigramming, it sometimes combines characters. TokenStream result = new CJKWidthFilter(source); - result = new LowerCaseFilter(result); + result = new LowerCaseFilter(matchVersion, result); result = new CJKBigramFilter(result); - return new TokenStreamComponents(source, new StopFilter(result, stopwords)); + return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords)); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java index edee99c5636..8a89ae5a978 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Sorani Kurdish. @@ -61,7 +62,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -73,17 +74,18 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public SoraniAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public SoraniAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public SoraniAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -91,12 +93,14 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public SoraniAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -114,11 +118,11 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); result = new SoraniNormalizationFilter(result); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SoraniStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java index 3955ecbe50a..052878028e3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java @@ -78,7 +78,7 @@ public final class CommonGramsFilter extends TokenFilter { * @param input TokenStream input in filter chain * @param commonWords The set of common words. */ - public CommonGramsFilter(TokenStream input, CharArraySet commonWords) { + public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) { super(input); this.commonWords = commonWords; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java index 82765f45f69..637568e8008 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java @@ -76,7 +76,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso @Override public TokenFilter create(TokenStream input) { - CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords); + CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords); return commonGrams; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java index 6002ea99309..888930f16ac 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java @@ -17,6 +17,8 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ +import java.io.Reader; + import org.apache.lucene.analysis.Analyzer; /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java index 9997d40155a..876a6160f73 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java @@ -18,11 +18,13 @@ package org.apache.lucene.analysis.core; */ import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; /** * Emits the entire input as a single token. diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java index 8c5588626f6..c29bcd50992 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; +import java.io.Reader; import java.util.Map; /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java index 5c0b6d2bcc9..e0437b3d467 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.Version; /** * A LetterTokenizer is a tokenizer that divides text at non-letters. That's to @@ -29,25 +30,41 @@ import org.apache.lucene.util.AttributeFactory; * Note: this does a decent job for most European languages, but does a terrible * job for some Asian languages, where words are not separated by spaces. *

+ *

+ * + * You must specify the required {@link Version} compatibility when creating + * {@link LetterTokenizer}: + *

+ *

*/ public class LetterTokenizer extends CharTokenizer { /** * Construct a new LetterTokenizer. + * + * @param matchVersion + * Lucene version to match See {@link
above} */ - public LetterTokenizer() { + public LetterTokenizer(Version matchVersion) { + super(matchVersion); } /** * Construct a new LetterTokenizer using a given * {@link org.apache.lucene.util.AttributeFactory}. * + * @param matchVersion + * Lucene version to match See {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public LetterTokenizer(AttributeFactory factory) { - super(factory); + public LetterTokenizer(Version matchVersion, AttributeFactory factory) { + super(matchVersion, factory); } /** Collects only characters which satisfy diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java index 11dae66d2b2..4a06f3127d8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java @@ -36,6 +36,7 @@ public class LetterTokenizerFactory extends TokenizerFactory { /** Creates a new LetterTokenizerFactory */ public LetterTokenizerFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -43,6 +44,6 @@ public class LetterTokenizerFactory extends TokenizerFactory { @Override public LetterTokenizer create(AttributeFactory factory) { - return new LetterTokenizer(factory); + return new LetterTokenizer(luceneMatchVersion, factory); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java index 7a7e96898a9..1b0ffa408f9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java @@ -23,21 +23,30 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharacterUtils; +import org.apache.lucene.util.Version; /** * Normalizes token text to lower case. + * + *

You must specify the required {@link Version} + * compatibility when creating LowerCaseFilter: + *

    + *
  • As of 3.1, supplementary characters are properly lowercased. + *
*/ public final class LowerCaseFilter extends TokenFilter { - private final CharacterUtils charUtils = CharacterUtils.getInstance(); + private final CharacterUtils charUtils; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new LowerCaseFilter, that normalizes token text to lower case. * + * @param matchVersion See
above * @param in TokenStream to filter */ - public LowerCaseFilter(TokenStream in) { + public LowerCaseFilter(Version matchVersion, TokenStream in) { super(in); + charUtils = CharacterUtils.getInstance(matchVersion); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java index ded2966292b..244722efcc0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java @@ -40,6 +40,7 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT /** Creates a new LowerCaseFilterFactory */ public LowerCaseFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -47,7 +48,7 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT @Override public LowerCaseFilter create(TokenStream input) { - return new LowerCaseFilter(input); + return new LowerCaseFilter(luceneMatchVersion,input); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java index 66586f77154..d61e1a938d9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java @@ -17,8 +17,13 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ +import java.io.Reader; + import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** * LowerCaseTokenizer performs the function of LetterTokenizer @@ -30,24 +35,41 @@ import org.apache.lucene.util.AttributeFactory; * Note: this does a decent job for most European languages, but does a terrible * job for some Asian languages, where words are not separated by spaces. *

+ *

+ * + * You must specify the required {@link Version} compatibility when creating + * {@link LowerCaseTokenizer}: + *

+ *

*/ public final class LowerCaseTokenizer extends LetterTokenizer { /** * Construct a new LowerCaseTokenizer. + * + * @param matchVersion + * Lucene version to match See {@link
above} + * */ - public LowerCaseTokenizer() { + public LowerCaseTokenizer(Version matchVersion) { + super(matchVersion); } /** * Construct a new LowerCaseTokenizer using a given * {@link org.apache.lucene.util.AttributeFactory}. * + * @param matchVersion + * Lucene version to match See {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public LowerCaseTokenizer(AttributeFactory factory) { - super(factory); + public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory) { + super(matchVersion, factory); } /** Converts char to lower case diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java index 68b3c049722..4af9a10484c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java @@ -39,6 +39,7 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi /** Creates a new LowerCaseTokenizerFactory */ public LowerCaseTokenizerFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -46,7 +47,7 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi @Override public LowerCaseTokenizer create(AttributeFactory factory) { - return new LowerCaseTokenizer(factory); + return new LowerCaseTokenizer(luceneMatchVersion, factory); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java index c63c968940e..bc9a69b7f64 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java @@ -17,22 +17,38 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ +import java.io.Reader; + import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.Version; /** An {@link Analyzer} that filters {@link LetterTokenizer} * with {@link LowerCaseFilter} + *

+ * You must specify the required {@link Version} compatibility + * when creating {@link CharTokenizer}: + *

+ *

**/ public final class SimpleAnalyzer extends Analyzer { + private final Version matchVersion; + /** * Creates a new {@link SimpleAnalyzer} + * @param matchVersion Lucene version to match See {@link above} */ - public SimpleAnalyzer() { + public SimpleAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; } @Override protected TokenStreamComponents createComponents(final String fieldName) { - return new TokenStreamComponents(new LowerCaseTokenizer()); + return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion)); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java index 102618f84be..fe85bc82e7a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java @@ -27,10 +27,20 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; +import org.apache.lucene.util.Version; + +/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. + * + * + *

You must specify the required {@link Version} + * compatibility when creating StopAnalyzer: + *

+*/ -/** - * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. - */ public final class StopAnalyzer extends StopwordAnalyzerBase { /** An unmodifiable set containing some common English words that are not usually useful @@ -45,35 +55,40 @@ public final class StopAnalyzer extends StopwordAnalyzerBase { "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" ); - final CharArraySet stopSet = new CharArraySet(stopWords, false); + final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, + stopWords, false); ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); } /** Builds an analyzer which removes words in * {@link #ENGLISH_STOP_WORDS_SET}. + * @param matchVersion See above */ - public StopAnalyzer() { - this(ENGLISH_STOP_WORDS_SET); + public StopAnalyzer(Version matchVersion) { + this(matchVersion, ENGLISH_STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given set. + * @param matchVersion See above * @param stopWords Set of stop words */ - public StopAnalyzer(CharArraySet stopWords) { - super(stopWords); + public StopAnalyzer(Version matchVersion, CharArraySet stopWords) { + super(matchVersion, stopWords); } /** Builds an analyzer with the stop words from the given file. - * @see WordlistLoader#getWordSet(Reader) + * @see WordlistLoader#getWordSet(Reader, Version) + * @param matchVersion See above * @param stopwordsFile File to load stop words from */ - public StopAnalyzer(File stopwordsFile) throws IOException { - this(loadStopwordSet(stopwordsFile)); + public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException { + this(matchVersion, loadStopwordSet(stopwordsFile, matchVersion)); } /** Builds an analyzer with the stop words from the given reader. - * @see WordlistLoader#getWordSet(Reader) + * @see WordlistLoader#getWordSet(Reader, Version) + * @param matchVersion See above * @param stopwords Reader to load stop words from */ - public StopAnalyzer(Reader stopwords) throws IOException { - this(loadStopwordSet(stopwords)); + public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException { + this(matchVersion, loadStopwordSet(stopwords, matchVersion)); } /** @@ -87,8 +102,9 @@ public final class StopAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new LowerCaseTokenizer(); - return new TokenStreamComponents(source, new StopFilter(source, stopwords)); + final Tokenizer source = new LowerCaseTokenizer(matchVersion); + return new TokenStreamComponents(source, new StopFilter(matchVersion, + source, stopwords)); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java index 2c3f000e25d..536d253671b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java @@ -24,9 +24,19 @@ import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; /** * Removes stop words from a token stream. + * + * + *

You must specify the required {@link Version} + * compatibility when creating StopFilter: + *

    + *
  • As of 3.1, StopFilter correctly handles Unicode 4.0 + * supplementary characters in stopwords and position + * increments are preserved + *
*/ public final class StopFilter extends FilteringTokenFilter { @@ -37,14 +47,17 @@ public final class StopFilter extends FilteringTokenFilter { * Constructs a filter which removes words from the input TokenStream that are * named in the Set. * + * @param matchVersion + * Lucene version to enable correct Unicode 4.0 behavior in the stop + * set if Version > 3.0. See
above for details. * @param in * Input stream * @param stopWords * A {@link CharArraySet} representing the stopwords. - * @see #makeStopSet(java.lang.String...) + * @see #makeStopSet(Version, java.lang.String...) */ - public StopFilter(TokenStream in, CharArraySet stopWords) { - super(in); + public StopFilter(Version matchVersion, TokenStream in, CharArraySet stopWords) { + super(matchVersion, in); this.stopWords = stopWords; } @@ -54,11 +67,12 @@ public final class StopFilter extends FilteringTokenFilter { * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * + * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords An array of stopwords - * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase + * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase */ - public static CharArraySet makeStopSet(String... stopWords) { - return makeStopSet(stopWords, false); + public static CharArraySet makeStopSet(Version matchVersion, String... stopWords) { + return makeStopSet(matchVersion, stopWords, false); } /** @@ -67,35 +81,38 @@ public final class StopFilter extends FilteringTokenFilter { * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * + * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @return A Set ({@link CharArraySet}) containing the words - * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase + * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase */ - public static CharArraySet makeStopSet(List stopWords) { - return makeStopSet(stopWords, false); + public static CharArraySet makeStopSet(Version matchVersion, List stopWords) { + return makeStopSet(matchVersion, stopWords, false); } /** * Creates a stopword set from the given stopword array. * + * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords An array of stopwords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ - public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { - CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); + public static CharArraySet makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) { + CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; } /** * Creates a stopword set from the given stopword list. + * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @param ignoreCase if true, all words are lower cased first * @return A Set ({@link CharArraySet}) containing the words */ - public static CharArraySet makeStopSet(List stopWords, boolean ignoreCase){ - CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); + public static CharArraySet makeStopSet(Version matchVersion, List stopWords, boolean ignoreCase){ + CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java index 7bf32429b1f..5e3c7e87fef 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java @@ -81,6 +81,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa /** Creates a new StopFilterFactory */ public StopFilterFactory(Map args) { super(args); + assureMatchVersion(); stopWordFiles = get(args, "words"); format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET)); ignoreCase = getBoolean(args, "ignoreCase", false); @@ -103,7 +104,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa if (null != format) { throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format); } - stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); + stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); } } @@ -117,7 +118,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa @Override public TokenStream create(TokenStream input) { - StopFilter stopFilter = new StopFilter(input,stopWords); + StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords); return stopFilter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java index d2791dfbf95..9c6bcbab744 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.FilteringTokenFilter; +import org.apache.lucene.util.Version; /** * Removes tokens whose types appear in a set of blocked types from a token stream. @@ -34,13 +35,14 @@ public final class TypeTokenFilter extends FilteringTokenFilter { /** * Create a new {@link TypeTokenFilter}. + * @param version the Lucene match version * @param input the {@link TokenStream} to consume * @param stopTypes the types to filter * @param useWhiteList if true, then tokens whose type is in stopTypes will * be kept, otherwise they will be filtered out */ - public TypeTokenFilter(TokenStream input, Set stopTypes, boolean useWhiteList) { - super(input); + public TypeTokenFilter(Version version, TokenStream input, Set stopTypes, boolean useWhiteList) { + super(version, input); this.stopTypes = stopTypes; this.useWhiteList = useWhiteList; } @@ -48,9 +50,10 @@ public final class TypeTokenFilter extends FilteringTokenFilter { /** * Create a new {@link TypeTokenFilter} that filters tokens out * (useWhiteList=false). + * @see #TypeTokenFilter(Version, TokenStream, Set, boolean) */ - public TypeTokenFilter(TokenStream input, Set stopTypes) { - this(input, stopTypes, false); + public TypeTokenFilter(Version version, TokenStream input, Set stopTypes) { + this(version, input, stopTypes, false); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java index 089ef7adb83..0545d754133 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java @@ -72,7 +72,7 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour @Override public TokenStream create(TokenStream input) { - final TokenStream filter = new TypeTokenFilter(input, stopTypes, useWhitelist); + final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, input, stopTypes, useWhitelist); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java index 6fdae1b685a..2625d4f5ebf 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java @@ -23,9 +23,13 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharacterUtils; +import org.apache.lucene.util.Version; /** * Normalizes token text to UPPER CASE. + * + *

You must specify the required {@link Version} + * compatibility when creating UpperCaseFilter * *

NOTE: In Unicode, this transformation may lose information when the * upper case character represents more than one lower case character. Use this filter @@ -33,16 +37,18 @@ import org.apache.lucene.analysis.util.CharacterUtils; * general search matching */ public final class UpperCaseFilter extends TokenFilter { - private final CharacterUtils charUtils = CharacterUtils.getInstance(); + private final CharacterUtils charUtils; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new UpperCaseFilter, that normalizes token text to upper case. * + * @param matchVersion See above * @param in TokenStream to filter */ - public UpperCaseFilter(TokenStream in) { + public UpperCaseFilter(Version matchVersion, TokenStream in) { super(in); + charUtils = CharacterUtils.getInstance(matchVersion); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java index ac97ad7bd0a..60f1119405a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java @@ -45,6 +45,7 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT /** Creates a new UpperCaseFilterFactory */ public UpperCaseFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -52,7 +53,7 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT @Override public UpperCaseFilter create(TokenStream input) { - return new UpperCaseFilter(input); + return new UpperCaseFilter(luceneMatchVersion,input); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java index 855f4f6a88c..2fdc3f3dcc3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java @@ -17,21 +17,38 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ +import java.io.Reader; + import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.util.CharTokenizer; +import org.apache.lucene.util.Version; /** * An Analyzer that uses {@link WhitespaceTokenizer}. + *

+ * You must specify the required {@link Version} compatibility + * when creating {@link CharTokenizer}: + *

+ *

**/ public final class WhitespaceAnalyzer extends Analyzer { + private final Version matchVersion; + /** * Creates a new {@link WhitespaceAnalyzer} + * @param matchVersion Lucene version to match See {@link above} */ - public WhitespaceAnalyzer() { + public WhitespaceAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; } @Override protected TokenStreamComponents createComponents(final String fieldName) { - return new TokenStreamComponents(new WhitespaceTokenizer()); + return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion)); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java index f38b07aed64..354322c444d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java @@ -17,31 +17,50 @@ package org.apache.lucene.analysis.core; * limitations under the License. */ +import java.io.Reader; + import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** * A WhitespaceTokenizer is a tokenizer that divides text at whitespace. - * Adjacent sequences of non-Whitespace characters form tokens. + * Adjacent sequences of non-Whitespace characters form tokens. + *

+ * You must specify the required {@link Version} compatibility when creating + * {@link WhitespaceTokenizer}: + *

*/ public final class WhitespaceTokenizer extends CharTokenizer { /** - * Construct a new WhitespaceTokenizer. + * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version + * to match See {@link above} + * */ - public WhitespaceTokenizer() { + public WhitespaceTokenizer(Version matchVersion) { + super(matchVersion); } /** * Construct a new WhitespaceTokenizer using a given * {@link org.apache.lucene.util.AttributeFactory}. * + * @param + * matchVersion Lucene version to match See + * {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public WhitespaceTokenizer(AttributeFactory factory) { - super(factory); + public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory) { + super(matchVersion, factory); } /** Collects only characters which do not satisfy diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java index 708996362a7..e23ee869665 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; +import java.io.Reader; import java.util.Map; /** @@ -36,6 +37,7 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory { /** Creates a new WhitespaceTokenizerFactory */ public WhitespaceTokenizerFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -43,6 +45,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory { @Override public WhitespaceTokenizer create(AttributeFactory factory) { - return new WhitespaceTokenizer(factory); + return new WhitespaceTokenizer(luceneMatchVersion, factory); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java index e8f49ef3616..b54739be60e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import java.io.*; import java.nio.charset.StandardCharsets; @@ -60,7 +61,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#"); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,30 +75,34 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}). + * + * @param matchVersion Lucene version to match */ - public CzechAnalyzer() { - this(DefaultSetHolder.DEFAULT_SET); + public CzechAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion Lucene version to match * @param stopwords a stopword set */ - public CzechAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words and a set of work to be * excluded from the {@link CzechStemFilter}. * + * @param matchVersion Lucene version to match * @param stopwords a stopword set * @param stemExclusionTable a stemming exclusion set */ - public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable) { - super(stopwords); - this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable)); + public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) { + super(matchVersion, stopwords); + this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); } /** @@ -110,16 +115,16 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} * , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If * a stem exclusion set is provided via - * {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a + * {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a * {@link SetKeywordMarkerFilter} is added before * {@link CzechStemFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter( matchVersion, result, stopwords); if(!this.stemExclusionTable.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionTable); result = new CzechStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java index 7f2720addc2..00f7520af9f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.DanishStemmer; /** @@ -63,7 +64,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,17 +76,18 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public DanishAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public DanishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public DanishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public DanishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,12 +95,14 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public DanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public DanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -115,10 +119,10 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new DanishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java index 1c61693dcc4..6cab61ea1f4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java @@ -69,7 +69,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -91,31 +91,35 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { * Builds an analyzer with the default stop words: * {@link #getDefaultStopSet()}. */ - public GermanAnalyzer() { - this(DefaultSetHolder.DEFAULT_SET); + public GermanAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public GermanAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public GermanAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a stemming exclusion set */ - public GermanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public GermanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); } /** @@ -131,10 +135,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter( matchVersion, result, stopwords); result = new SetKeywordMarkerFilter(result, exclusionSet); result = new GermanNormalizationFilter(result); result = new GermanLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java index 06e1e5273ea..4f418ee183f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java @@ -69,9 +69,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words. + * @param matchVersion Lucene compatibility version */ - public GreekAnalyzer() { - this(DefaultSetHolder.DEFAULT_SET); + public GreekAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_SET); } /** @@ -80,10 +81,11 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase { * NOTE: The stopwords set should be pre-processed with the logic of * {@link GreekLowerCaseFilter} for best results. * + * @param matchVersion Lucene compatibility version * @param stopwords a stopword set */ - public GreekAnalyzer(CharArraySet stopwords) { - super(stopwords); + public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) { + super(matchVersion, stopwords); } /** @@ -98,10 +100,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new GreekLowerCaseFilter(source); - result = new StandardFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new GreekLowerCaseFilter(matchVersion, source); + result = new StandardFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); result = new GreekStemFilter(result); return new TokenStreamComponents(source, result); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java index 66d4aa6a602..ba0a20ac29e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java @@ -22,22 +22,32 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharacterUtils; +import org.apache.lucene.util.Version; /** * Normalizes token text to lower case, removes some Greek diacritics, * and standardizes final sigma to sigma. + * + *

You must specify the required {@link Version} + * compatibility when creating GreekLowerCaseFilter: + *

    + *
  • As of 3.1, supplementary characters are properly lowercased. + *
*/ public final class GreekLowerCaseFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final CharacterUtils charUtils = CharacterUtils.getInstance(); + private final CharacterUtils charUtils; /** * Create a GreekLowerCaseFilter that normalizes Greek token text. * + * @param matchVersion Lucene compatibility version, + * See
above * @param in TokenStream to filter */ - public GreekLowerCaseFilter(TokenStream in) { + public GreekLowerCaseFilter(Version matchVersion, TokenStream in) { super(in); + this.charUtils = CharacterUtils.getInstance(matchVersion); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java index 5ff0c90f63e..15b6f9251c1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java @@ -40,6 +40,7 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M /** Creates a new GreekLowerCaseFilterFactory */ public GreekLowerCaseFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -47,7 +48,7 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M @Override public GreekLowerCaseFilter create(TokenStream in) { - return new GreekLowerCaseFilter(in); + return new GreekLowerCaseFilter(luceneMatchVersion, in); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java index 750bd3589ae..f714e54c3d0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java @@ -1,6 +1,7 @@ package org.apache.lucene.analysis.el; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; import java.util.Arrays; @@ -204,7 +205,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc4 = new CharArraySet( + private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"), false); @@ -230,7 +231,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc6 = new CharArraySet( + private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ", "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ", "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ", @@ -255,7 +256,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc7 = new CharArraySet( + private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ", "πεθ", "πικρ", "ποτ", "σιχ", "χ"), false); @@ -282,11 +283,11 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc8a = new CharArraySet( + private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("τρ", "τσ"), false); - private static final CharArraySet exc8b = new CharArraySet( + private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ", "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ", "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ", @@ -345,7 +346,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc9 = new CharArraySet( + private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ", "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ", "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), @@ -433,11 +434,11 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc12a = new CharArraySet( + private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"), false); - private static final CharArraySet exc12b = new CharArraySet( + private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"), false); @@ -457,7 +458,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc13 = new CharArraySet( + private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"), false); @@ -491,7 +492,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc14 = new CharArraySet( + private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ", "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ", "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε", @@ -529,7 +530,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc15a = new CharArraySet( + private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ", "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ", "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ", @@ -538,7 +539,7 @@ public class GreekStemmer { "ουλαμ", "ουρ", "π", "τρ", "μ"), false); - private static final CharArraySet exc15b = new CharArraySet( + private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ψοφ", "ναυλοχ"), false); @@ -575,7 +576,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc16 = new CharArraySet( + private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"), false); @@ -595,7 +596,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc17 = new CharArraySet( + private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"), false); @@ -609,7 +610,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc18 = new CharArraySet( + private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"), false); @@ -633,7 +634,7 @@ public class GreekStemmer { return len; } - private static final CharArraySet exc19 = new CharArraySet( + private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"), false); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java index 15bfb51a518..934540a3ec9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java @@ -30,6 +30,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for English. @@ -56,17 +57,18 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. */ - public EnglishAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public EnglishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public EnglishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -74,12 +76,14 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -97,11 +101,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new EnglishPossessiveFilter(result); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new EnglishPossessiveFilter(matchVersion, result); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PorterStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java index e4e03a1c1b3..9f6f21884d2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.Version; /** * TokenFilter that removes possessives (trailing 's) from words. @@ -29,7 +30,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public final class EnglishPossessiveFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - public EnglishPossessiveFilter(TokenStream input) { + // NOTE: version now unused + public EnglishPossessiveFilter(Version version, TokenStream input) { super(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java index 40f1d30751d..f1685a7941b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java @@ -39,6 +39,7 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory { /** Creates a new EnglishPossessiveFilterFactory */ public EnglishPossessiveFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -46,6 +47,6 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory { @Override public TokenStream create(TokenStream input) { - return new EnglishPossessiveFilter(input); + return new EnglishPossessiveFilter(luceneMatchVersion, input); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java index b4d68a5f797..cdb397b93bf 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java @@ -64,6 +64,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder; *

Copyright: Copyright 2008, Luicid Imagination, Inc.

*

Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu)

*/ +import org.apache.lucene.util.Version; /** * This class implements the Kstem algorithm @@ -279,7 +280,7 @@ public class KStemmer { DictEntry defaultEntry; DictEntry entry; - CharArrayMap d = new CharArrayMap<>(1000, false); + CharArrayMap d = new CharArrayMap<>(Version.LUCENE_CURRENT, 1000, false); for (int i = 0; i < exceptionWords.length; i++) { if (!d.containsKey(exceptionWords[i])) { entry = new DictEntry(exceptionWords[i], true); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java index 3c2812bbd2d..2ce1965af61 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Spanish. @@ -62,7 +63,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,17 +75,18 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public SpanishAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public SpanishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public SpanishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -92,12 +94,14 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -114,10 +118,10 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SpanishLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java index 4222e5a0998..12bb7a3ef5d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.BasqueStemmer; /** @@ -72,17 +73,18 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public BasqueAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public BasqueAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public BasqueAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -90,12 +92,14 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -112,10 +116,10 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new BasqueStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java index df9c2fb5c96..1f1b4b2bdb0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Persian. @@ -86,18 +87,20 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. */ - public PersianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public PersianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public PersianAnalyzer(CharArraySet stopwords){ - super(stopwords); + public PersianAnalyzer(Version matchVersion, CharArraySet stopwords){ + super(matchVersion, stopwords); } /** @@ -112,8 +115,8 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new LowerCaseFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new LowerCaseFilter(matchVersion, source); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); @@ -121,7 +124,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { * the order here is important: the stopword list is normalized with the * above! */ - return new TokenStreamComponents(source, new StopFilter(result, stopwords)); + return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords)); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java index 84a3c4ffd5f..5f824429772 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.FinnishStemmer; /** @@ -63,7 +64,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,17 +76,18 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public FinnishAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public FinnishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public FinnishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,12 +95,14 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public FinnishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -115,10 +119,10 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new FinnishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java index 5939ef8fa98..b86fb80cb86 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java @@ -59,7 +59,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { /** Default set of articles for ElisionFilter */ public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet(Arrays.asList( + new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true)); /** @@ -80,7 +80,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -92,33 +92,37 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}). */ - public FrenchAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public FrenchAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public FrenchAnalyzer(CharArraySet stopwords){ - this(stopwords, CharArraySet.EMPTY_SET); + public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){ + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set * @param stemExclutionSet * a stemming exclusion set */ - public FrenchAnalyzer(CharArraySet stopwords, + public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclutionSet) { - super(stopwords); + super(matchVersion, stopwords); this.excltable = CharArraySet.unmodifiableSet(CharArraySet - .copy(stemExclutionSet)); + .copy(matchVersion, stemExclutionSet)); } /** @@ -135,11 +139,11 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); result = new ElisionFilter(result, DEFAULT_ARTICLES); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!excltable.isEmpty()) result = new SetKeywordMarkerFilter(result, excltable); result = new FrenchLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java index 00413d55cf1..089e123845b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.IrishStemmer; /** @@ -44,7 +45,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet( + new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "d", "m", "b" ), true)); @@ -55,7 +56,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { * with phrase queries versus tAthair (which would not have a gap). */ private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet( - new CharArraySet( + new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "h", "n", "t" ), true)); @@ -90,17 +91,18 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public IrishAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public IrishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public IrishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -108,12 +110,14 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public IrishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -130,12 +134,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new StopFilter(result, HYPHENATIONS); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new StopFilter(matchVersion, result, HYPHENATIONS); result = new ElisionFilter(result, DEFAULT_ARTICLES); result = new IrishLowerCaseFilter(result); - result = new StopFilter(result, stopwords); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new IrishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java index b79245ba15e..a40276ff6de 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Galician. @@ -61,7 +62,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -73,17 +74,18 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public GalicianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public GalicianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public GalicianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -91,12 +93,14 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -113,10 +117,10 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new GalicianStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java index 4ee31f13ddb..1edd0e8030e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.in.IndicNormalizationFilter; +import org.apache.lucene.util.Version; /** * Analyzer for Hindi. @@ -74,29 +75,32 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the given stop words * + * @param version lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a stemming exclusion set */ - public HindiAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(version, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet( + CharArraySet.copy(matchVersion, stemExclusionSet)); } /** * Builds an analyzer with the given stop words * + * @param version lucene compatibility version * @param stopwords a stopword set */ - public HindiAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public HindiAnalyzer(Version version, CharArraySet stopwords) { + this(version, stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. */ - public HindiAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public HindiAnalyzer(Version version) { + this(version, DefaultSetHolder.DEFAULT_STOP_SET); } /** @@ -113,13 +117,13 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new LowerCaseFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new LowerCaseFilter(matchVersion, source); if (!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new IndicNormalizationFilter(result); result = new HindiNormalizationFilter(result); - result = new StopFilter(result, stopwords); + result = new StopFilter(matchVersion, result, stopwords); result = new HindiStemFilter(result); return new TokenStreamComponents(source, result); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java index 8784e3bbb31..d2addb81747 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.HungarianStemmer; /** @@ -63,7 +64,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,17 +76,18 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public HungarianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public HungarianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public HungarianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,12 +95,14 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public HungarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -115,10 +119,10 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new HungarianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index aa657fdb309..ef4b26d2212 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -215,7 +215,7 @@ final class Stemmer { if (stems.size() < 2) { return stems; } - CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase); + CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase); List deduped = new ArrayList<>(); for (CharsRef s : stems) { if (!terms.contains(s)) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java index ae22c47d8b4..0f5065954ad 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.ArmenianStemmer; /** @@ -72,17 +73,18 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public ArmenianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public ArmenianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public ArmenianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -90,12 +92,14 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -112,10 +116,10 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new ArmenianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java index d54b3609597..85bd081e7a4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; /** * Analyzer for Indonesian (Bahasa) @@ -68,18 +69,20 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public IndonesianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public IndonesianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public IndonesianAnalyzer(CharArraySet stopwords){ - this(stopwords, CharArraySet.EMPTY_SET); + public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){ + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -87,14 +90,17 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * {@link IndonesianStemFilter}. * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet * a set of terms not to be stemmed */ - public IndonesianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){ - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){ + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -110,10 +116,10 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java index afae44def4c..382bfaef9c8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java @@ -36,6 +36,7 @@ import org.apache.lucene.analysis.util.ElisionFilter; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Italian. @@ -47,7 +48,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt"; private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( - new CharArraySet( + new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d" @@ -71,7 +72,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -83,17 +84,18 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public ItalianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public ItalianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public ItalianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -101,12 +103,14 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -123,11 +127,11 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); result = new ElisionFilter(result, DEFAULT_ARTICLES); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new ItalianLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java index 0d858428cac..c6b80ed756b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Latvian. @@ -61,7 +62,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -73,17 +74,18 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public LatvianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public LatvianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public LatvianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -91,12 +93,14 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -113,10 +117,10 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new LatvianStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java index 986994e5121..4c9743caf2b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java @@ -82,7 +82,7 @@ public class CapitalizationFilterFactory extends TokenFilterFactory { boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false); Set k = getSet(args, KEEP); if (k != null) { - keep = new CharArraySet(10, ignoreCase); + keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase); keep.addAll(k); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java index 8e1726fb5ce..5f501e06cb2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.Version; /** * Removes words that are too long or too short from the stream. @@ -38,12 +39,13 @@ public final class CodepointCountFilter extends FilteringTokenFilter { * Create a new {@link CodepointCountFilter}. This will filter out tokens whose * {@link CharTermAttribute} is either too short ({@link Character#codePointCount(char[], int, int)} * < min) or too long ({@link Character#codePointCount(char[], int, int)} > max). + * @param version the Lucene match version * @param in the {@link TokenStream} to consume * @param min the minimum length * @param max the maximum length */ - public CodepointCountFilter(TokenStream in, int min, int max) { - super(in); + public CodepointCountFilter(Version version, TokenStream in, int min, int max) { + super(version, in); if (min < 0) { throw new IllegalArgumentException("minimum length must be greater than or equal to zero"); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java index d42d7f6452f..54250641d32 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java @@ -50,6 +50,6 @@ public class CodepointCountFilterFactory extends TokenFilterFactory { @Override public CodepointCountFilter create(TokenStream input) { - return new CodepointCountFilter(input, min, max); + return new CodepointCountFilter(luceneMatchVersion, input, min, max); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java index 093d22b60b9..c77e3a7614b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; /** * A TokenFilter that only keeps tokens with text contained in the @@ -36,11 +37,12 @@ public final class KeepWordFilter extends FilteringTokenFilter { * Create a new {@link KeepWordFilter}. *

NOTE: The words set passed to this constructor will be directly * used by this filter and should not be modified. + * @param version the Lucene match version * @param in the {@link TokenStream} to consume * @param words the words to keep */ - public KeepWordFilter(TokenStream in, CharArraySet words) { - super(in); + public KeepWordFilter(Version version, TokenStream in, CharArraySet words) { + super(version, in); this.words = words; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java index 7d4c24e0968..78c831bc8ce 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java @@ -44,6 +44,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc /** Creates a new KeepWordFilterFactory */ public KeepWordFilterFactory(Map args) { super(args); + assureMatchVersion(); wordFiles = get(args, "words"); ignoreCase = getBoolean(args, "ignoreCase", false); if (!args.isEmpty()) { @@ -72,7 +73,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc if (words == null) { return input; } else { - final TokenStream filter = new KeepWordFilter(input, words); + final TokenStream filter = new KeepWordFilter(luceneMatchVersion, input, words); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java index bd7e2232023..f35afc68b6f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.Version; /** * Removes words that are too long or too short from the stream. @@ -38,12 +39,13 @@ public final class LengthFilter extends FilteringTokenFilter { * Create a new {@link LengthFilter}. This will filter out tokens whose * {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()} * < min) or too long ({@link CharTermAttribute#length()} > max). + * @param version the Lucene match version * @param in the {@link TokenStream} to consume * @param min the minimum length * @param max the maximum length */ - public LengthFilter(TokenStream in, int min, int max) { - super(in); + public LengthFilter(Version version, TokenStream in, int min, int max) { + super(version, in); if (min < 0) { throw new IllegalArgumentException("minimum length must be greater than or equal to zero"); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java index 476f37543ea..6d63623e0fb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java @@ -50,7 +50,7 @@ public class LengthFilterFactory extends TokenFilterFactory { @Override public LengthFilter create(TokenStream input) { - final LengthFilter filter = new LengthFilter(input,min,max); + final LengthFilter filter = new LengthFilter(luceneMatchVersion, input,min,max); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java index a1785abcf73..e3c7a033bdb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; import java.io.IOException; @@ -33,7 +34,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); - private final CharArraySet previous = new CharArraySet(8, false); + // use a fixed version, as we don't care about case sensitivity. + private final CharArraySet previous = new CharArraySet(Version.LUCENE_CURRENT, 8, false); /** * Creates a new RemoveDuplicatesTokenFilter diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java index 20803202c77..6dadf820933 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java @@ -20,11 +20,15 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util.Version; import java.io.IOException; /** * Trims leading and trailing whitespace from Tokens in the stream. + *

As of Lucene 4.4, this filter does not support updateOffsets=true anymore + * as it can lead to broken token streams. */ public final class TrimFilter extends TokenFilter { @@ -32,9 +36,10 @@ public final class TrimFilter extends TokenFilter { /** * Create a new {@link TrimFilter}. + * @param version the Lucene match version * @param in the stream to consume */ - public TrimFilter(TokenStream in) { + public TrimFilter(Version version, TokenStream in) { super(in); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java index 58c400b9bd6..c21233119cd 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java @@ -47,7 +47,7 @@ public class TrimFilterFactory extends TokenFilterFactory { @Override public TrimFilter create(TokenStream input) { - final TrimFilter filter = new TrimFilter(input); + final TrimFilter filter = new TrimFilter(luceneMatchVersion, input); return filter; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java index 62034031f1b..20fda83c449 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java @@ -80,7 +80,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter { } this.charUtils = version.onOrAfter(Version.LUCENE_4_4) - ? CharacterUtils.getInstance() + ? CharacterUtils.getInstance(version) : CharacterUtils.getJava4Instance(); this.minGram = minGram; this.maxGram = maxGram; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java index b20f598cd70..ba87146a44f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java @@ -81,10 +81,10 @@ public final class NGramTokenFilter extends TokenFilter { * @param maxGram the largest n-gram to generate */ public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) { - super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE)); + super(new CodepointCountFilter(version, input, minGram, Integer.MAX_VALUE)); this.version = version; this.charUtils = version.onOrAfter(Version.LUCENE_4_4) - ? CharacterUtils.getInstance() + ? CharacterUtils.getInstance(version) : CharacterUtils.getJava4Instance(); if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java index 14e2a3b162e..72c943b1ef9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java @@ -121,7 +121,7 @@ public class NGramTokenizer extends Tokenizer { throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer"); } charUtils = version.onOrAfter(Version.LUCENE_4_4) - ? CharacterUtils.getInstance() + ? CharacterUtils.getInstance(version) : CharacterUtils.getJava4Instance(); if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java index e3b2389f542..1f29184429d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java @@ -28,11 +28,13 @@ import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -48,8 +50,10 @@ import java.nio.charset.StandardCharsets; * A default set of stopwords is used unless an alternative list is specified, but the * exclusion list is empty by default. *

+ * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ -// TODO: extend StopwordAnalyzerBase public final class DutchAnalyzer extends Analyzer { /** File containing default Dutch stopwords. */ @@ -69,14 +73,14 @@ public final class DutchAnalyzer extends Analyzer { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) throw new RuntimeException("Unable to load default stopword set"); } - DEFAULT_STEM_DICT = new CharArrayMap<>(4, false); + DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false); DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet DEFAULT_STEM_DICT.put("ei", "eier"); @@ -96,27 +100,29 @@ public final class DutchAnalyzer extends Analyzer { private CharArraySet excltable = CharArraySet.EMPTY_SET; private final StemmerOverrideMap stemdict; + private final Version matchVersion; /** * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}) * and a few default entries for the stem exclusion table. * */ - public DutchAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); + public DutchAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); } - public DutchAnalyzer(CharArraySet stopwords){ - this(stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); + public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){ + this(matchVersion, stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT); } - public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable){ - this(stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT); + public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){ + this(matchVersion, stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT); } - public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) { - this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords)); - this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable)); + public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) { + this.matchVersion = matchVersion; + this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); + this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.isEmpty()) { this.stemdict = null; } else { @@ -148,10 +154,10 @@ public final class DutchAnalyzer extends Analyzer { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stoptable); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stoptable); if (!excltable.isEmpty()) result = new SetKeywordMarkerFilter(result, excltable); if (stemdict != null) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java index 0dd81255964..ffe519947d0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.NorwegianStemmer; /** @@ -63,7 +64,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,17 +76,18 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public NorwegianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public NorwegianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public NorwegianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,12 +95,14 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public NorwegianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -115,10 +119,10 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new NorwegianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java index fde61d6fa8c..3bceb5c6ab3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Portuguese. @@ -62,7 +63,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,17 +75,18 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public PortugueseAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public PortugueseAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public PortugueseAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -92,12 +94,14 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public PortugueseAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -114,10 +118,10 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PortugueseLightStemFilter(result); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java index 54ecdff15f2..f8ad153cfb4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java @@ -31,6 +31,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; import static org.apache.lucene.analysis.util.StemmerUtil.*; @@ -134,7 +135,8 @@ public abstract class RSLPStemmerBase { if (!exceptions[i].endsWith(suffix)) throw new RuntimeException("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'"); } - this.exceptions = new CharArraySet(Arrays.asList(exceptions), false); + this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, + Arrays.asList(exceptions), false); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java index 995ae2db893..8a4b8aa52ad 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util.Version; /** * An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection @@ -49,20 +50,23 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { //The default maximum percentage (40%) of index documents which //can contain a term, after which the term is considered to be a stop word. public static final float defaultMaxDocFreqPercent = 0.4f; + private final Version matchVersion; /** * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all * indexed fields from terms with a document frequency percentage greater than * {@link #defaultMaxDocFreqPercent} * + * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( + Version matchVersion, Analyzer delegate, IndexReader indexReader) throws IOException { - this(delegate, indexReader, defaultMaxDocFreqPercent); + this(matchVersion, delegate, indexReader, defaultMaxDocFreqPercent); } /** @@ -70,16 +74,18 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * indexed fields from terms with a document frequency greater than the given * maxDocFreq * + * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param maxDocFreq Document frequency terms should be above in order to be stopwords * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( + Version matchVersion, Analyzer delegate, IndexReader indexReader, int maxDocFreq) throws IOException { - this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq); + this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq); } /** @@ -87,6 +93,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * indexed fields from terms with a document frequency percentage greater than * the given maxPercentDocs * + * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which @@ -94,10 +101,11 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( + Version matchVersion, Analyzer delegate, IndexReader indexReader, float maxPercentDocs) throws IOException { - this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs); + this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs); } /** @@ -105,6 +113,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * given selection of fields from terms with a document frequency percentage * greater than the given maxPercentDocs * + * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param fields Selection of fields to calculate stopwords for @@ -113,11 +122,12 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( + Version matchVersion, Analyzer delegate, IndexReader indexReader, Collection fields, float maxPercentDocs) throws IOException { - this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs)); + this(matchVersion, delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs)); } /** @@ -125,6 +135,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * given selection of fields from terms with a document frequency greater than * the given maxDocFreq * + * @param matchVersion Version to be used in {@link StopFilter} * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param fields Selection of fields to calculate stopwords for @@ -132,11 +143,13 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( + Version matchVersion, Analyzer delegate, IndexReader indexReader, Collection fields, int maxDocFreq) throws IOException { super(delegate.getReuseStrategy()); + this.matchVersion = matchVersion; this.delegate = delegate; for (String field : fields) { @@ -168,8 +181,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { if (stopWords == null) { return components; } - StopFilter stopFilter = new StopFilter(components.getTokenStream(), - new CharArraySet(stopWords, false)); + StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), + new CharArraySet(matchVersion, stopWords, false)); return new TokenStreamComponents(components.getTokenizer(), stopFilter); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java index c9dee414442..e729786cfe7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.reverse; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.Version; import java.io.IOException; @@ -35,6 +36,7 @@ public final class ReverseStringFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final char marker; + private final Version matchVersion; private static final char NOMARKER = '\uFFFF'; /** @@ -64,10 +66,11 @@ public final class ReverseStringFilter extends TokenFilter { * The reversed tokens will not be marked. *

* + * @param matchVersion Lucene compatibility version * @param in {@link TokenStream} to filter */ - public ReverseStringFilter(TokenStream in) { - this(in, NOMARKER); + public ReverseStringFilter(Version matchVersion, TokenStream in) { + this(matchVersion, in, NOMARKER); } /** @@ -78,11 +81,13 @@ public final class ReverseStringFilter extends TokenFilter { * character. *

* + * @param matchVersion compatibility version * @param in {@link TokenStream} to filter * @param marker A character used to mark reversed tokens */ - public ReverseStringFilter(TokenStream in, char marker) { + public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) { super(in); + this.matchVersion = matchVersion; this.marker = marker; } @@ -95,7 +100,7 @@ public final class ReverseStringFilter extends TokenFilter { termAtt.resizeBuffer(len); termAtt.buffer()[len - 1] = marker; } - reverse( termAtt.buffer(), 0, len ); + reverse( matchVersion, termAtt.buffer(), 0, len ); termAtt.setLength(len); return true; } else { @@ -106,43 +111,48 @@ public final class ReverseStringFilter extends TokenFilter { /** * Reverses the given input string * + * @param matchVersion compatibility version * @param input the string to reverse * @return the given input string in reversed order */ - public static String reverse(final String input ){ + public static String reverse( Version matchVersion, final String input ){ final char[] charInput = input.toCharArray(); - reverse( charInput, 0, charInput.length ); + reverse( matchVersion, charInput, 0, charInput.length ); return new String( charInput ); } /** * Reverses the given input buffer in-place + * @param matchVersion compatibility version * @param buffer the input char array to reverse */ - public static void reverse(final char[] buffer) { - reverse(buffer, 0, buffer.length); + public static void reverse(Version matchVersion, final char[] buffer) { + reverse(matchVersion, buffer, 0, buffer.length); } /** * Partially reverses the given input buffer in-place from offset 0 * up to the given length. + * @param matchVersion compatibility version * @param buffer the input char array to reverse * @param len the length in the buffer up to where the * buffer should be reversed */ - public static void reverse(final char[] buffer, final int len) { - reverse( buffer, 0, len ); + public static void reverse(Version matchVersion, final char[] buffer, + final int len) { + reverse( matchVersion, buffer, 0, len ); } /** * Partially reverses the given input buffer in-place from the given offset * up to the given length. + * @param matchVersion compatibility version * @param buffer the input char array to reverse * @param start the offset from where to reverse the buffer * @param len the length in the buffer up to where the * buffer should be reversed */ - public static void reverse(final char[] buffer, + public static void reverse(Version matchVersion, final char[] buffer, final int start, final int len) { /* modified version of Apache Harmony AbstractStringBuilder reverse0() */ if (len < 2) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java index 33cfc97fb5d..f25831ad198 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java @@ -40,6 +40,7 @@ public class ReverseStringFilterFactory extends TokenFilterFactory { /** Creates a new ReverseStringFilterFactory */ public ReverseStringFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -47,7 +48,7 @@ public class ReverseStringFilterFactory extends TokenFilterFactory { @Override public ReverseStringFilter create(TokenStream in) { - return new ReverseStringFilter(in); + return new ReverseStringFilter(luceneMatchVersion,in); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java index b59943d3ba8..22af94ec177 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java @@ -78,17 +78,18 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public RomanianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public RomanianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public RomanianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -96,12 +97,14 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public RomanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -118,10 +121,10 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new RomanianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java index 869dce77781..69ab96fa679 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java @@ -54,7 +54,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -74,30 +74,34 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { return DefaultSetHolder.DEFAULT_STOP_SET; } - public RussianAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public RussianAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set */ - public RussianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public RussianAnalyzer(Version matchVersion, CharArraySet stopwords){ + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** * Builds an analyzer with the given stop words * + * @param matchVersion + * lucene compatibility version * @param stopwords * a stopword set * @param stemExclusionSet a set of words not to be stemmed */ - public RussianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){ + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet)); } /** @@ -113,10 +117,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java index cd2e3353f38..9f7cf319012 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.shingle; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.util.Version; /** * A ShingleAnalyzerWrapper wraps a {@link ShingleFilter} around another {@link Analyzer}. @@ -100,15 +101,15 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper { /** * Wraps {@link StandardAnalyzer}. */ - public ShingleAnalyzerWrapper() { - this(ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); + public ShingleAnalyzerWrapper(Version matchVersion) { + this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); } /** * Wraps {@link StandardAnalyzer}. */ - public ShingleAnalyzerWrapper(int minShingleSize, int maxShingleSize) { - this(new StandardAnalyzer(), minShingleSize, maxShingleSize); + public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) { + this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java index 9663bfacaed..f7927161726 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java @@ -17,14 +17,16 @@ package org.apache.lucene.analysis.standard; * limitations under the License. */ -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; +import org.apache.lucene.util.Version; +import java.io.File; import java.io.IOException; import java.io.Reader; @@ -32,6 +34,18 @@ import java.io.Reader; * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link * LowerCaseFilter} and {@link StopFilter}, using a list of * English stop words. + * + * + *

You must specify the required {@link Version} + * compatibility when creating ClassicAnalyzer: + *

* * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation, @@ -49,23 +63,29 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase { public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** Builds an analyzer with the given stop words. + * @param matchVersion Lucene version to match See {@link + * above} * @param stopWords stop words */ - public ClassicAnalyzer(CharArraySet stopWords) { - super(stopWords); + public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) { + super(matchVersion, stopWords); } /** Builds an analyzer with the default stop words ({@link * #STOP_WORDS_SET}). + * @param matchVersion Lucene version to match See {@link + * above} */ - public ClassicAnalyzer() { - this(STOP_WORDS_SET); + public ClassicAnalyzer(Version matchVersion) { + this(matchVersion, STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given reader. - * @see WordlistLoader#getWordSet(Reader) + * @see WordlistLoader#getWordSet(Reader, Version) + * @param matchVersion Lucene version to match See {@link + * above} * @param stopwords Reader to read stop words from */ - public ClassicAnalyzer(Reader stopwords) throws IOException { - this(loadStopwordSet(stopwords)); + public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException { + this(matchVersion, loadStopwordSet(stopwords, matchVersion)); } /** @@ -87,11 +107,11 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName) { - final ClassicTokenizer src = new ClassicTokenizer(); + final ClassicTokenizer src = new ClassicTokenizer(matchVersion); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new ClassicFilter(src); - tok = new LowerCaseFilter(tok); - tok = new StopFilter(tok, stopwords); + tok = new LowerCaseFilter(matchVersion, tok); + tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java index 118a41cb8b0..eb085894788 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.standard; import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -25,6 +26,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** A grammar-based tokenizer constructed with JFlex * @@ -99,19 +102,19 @@ public final class ClassicTokenizer extends Tokenizer { * * See http://issues.apache.org/jira/browse/LUCENE-1068 */ - public ClassicTokenizer() { - init(); + public ClassicTokenizer(Version matchVersion) { + init(matchVersion); } /** * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} */ - public ClassicTokenizer(AttributeFactory factory) { + public ClassicTokenizer(Version matchVersion, AttributeFactory factory) { super(factory); - init(); + init(matchVersion); } - private void init() { + private void init(Version matchVersion) { this.scanner = new ClassicTokenizerImpl(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java index e4d901b82ba..3d73bd7d506 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java @@ -37,6 +37,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory { /** Creates a new ClassicTokenizerFactory */ public ClassicTokenizerFactory(Map args) { super(args); + assureMatchVersion(); maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -45,7 +46,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory { @Override public ClassicTokenizer create(AttributeFactory factory) { - ClassicTokenizer tokenizer = new ClassicTokenizer(factory); + ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java index db9c4719dc0..00604afc17e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java @@ -17,14 +17,16 @@ package org.apache.lucene.analysis.standard; * limitations under the License. */ -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; +import org.apache.lucene.util.Version; +import java.io.File; import java.io.IOException; import java.io.Reader; @@ -32,9 +34,26 @@ import java.io.Reader; * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link * LowerCaseFilter} and {@link StopFilter}, using a list of * English stop words. + * + * + *

You must specify the required {@link Version} + * compatibility when creating StandardAnalyzer: + *

*/ public final class StandardAnalyzer extends StopwordAnalyzerBase { - + /** Default maximum allowed token length */ public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; @@ -45,22 +64,29 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase { public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** Builds an analyzer with the given stop words. + * @param matchVersion Lucene version to match See {@link + * above} * @param stopWords stop words */ - public StandardAnalyzer(CharArraySet stopWords) { - super(stopWords); + public StandardAnalyzer(Version matchVersion, CharArraySet stopWords) { + super(matchVersion, stopWords); } - /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). + /** Builds an analyzer with the default stop words ({@link + * #STOP_WORDS_SET}). + * @param matchVersion Lucene version to match See {@link + * above} */ - public StandardAnalyzer() { - this(STOP_WORDS_SET); + public StandardAnalyzer(Version matchVersion) { + this(matchVersion, STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given reader. - * @see WordlistLoader#getWordSet(Reader) + * @see WordlistLoader#getWordSet(Reader, Version) + * @param matchVersion Lucene version to match See {@link + * above} * @param stopwords Reader to read stop words from */ - public StandardAnalyzer(Reader stopwords) throws IOException { - this(loadStopwordSet(stopwords)); + public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException { + this(matchVersion, loadStopwordSet(stopwords, matchVersion)); } /** @@ -82,11 +108,11 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName) { - final StandardTokenizer src = new StandardTokenizer(); + final StandardTokenizer src = new StandardTokenizer(matchVersion); src.setMaxTokenLength(maxTokenLength); - TokenStream tok = new StandardFilter(src); - tok = new LowerCaseFilter(tok); - tok = new StopFilter(tok, stopwords); + TokenStream tok = new StandardFilter(matchVersion, src); + tok = new LowerCaseFilter(matchVersion, tok); + tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java index ae5be75bc1e..809f9653dfe 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java @@ -21,13 +21,14 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.Version; /** * Normalizes tokens extracted with {@link StandardTokenizer}. */ public class StandardFilter extends TokenFilter { - public StandardFilter(TokenStream in) { + public StandardFilter(Version matchVersion, TokenStream in) { super(in); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java index f9102b00b44..f2dd7e0507f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java @@ -38,6 +38,7 @@ public class StandardFilterFactory extends TokenFilterFactory { /** Creates a new StandardFilterFactory */ public StandardFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -45,6 +46,6 @@ public class StandardFilterFactory extends TokenFilterFactory { @Override public StandardFilter create(TokenStream input) { - return new StandardFilter(input); + return new StandardFilter(luceneMatchVersion, input); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index bcfb6f6f267..196c0ca1baf 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.standard; import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -25,6 +26,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** A grammar-based tokenizer constructed with JFlex. *

@@ -113,19 +116,19 @@ public final class StandardTokenizer extends Tokenizer { * See http://issues.apache.org/jira/browse/LUCENE-1068 */ - public StandardTokenizer() { - init(); + public StandardTokenizer(Version matchVersion) { + init(matchVersion); } /** * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} */ - public StandardTokenizer(AttributeFactory factory) { + public StandardTokenizer(Version matchVersion, AttributeFactory factory) { super(factory); - init(); + init(matchVersion); } - private void init() { + private void init(Version matchVersion) { this.scanner = new StandardTokenizerImpl(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java index 87709aa8622..bb5248b947b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java @@ -37,6 +37,7 @@ public class StandardTokenizerFactory extends TokenizerFactory { /** Creates a new StandardTokenizerFactory */ public StandardTokenizerFactory(Map args) { super(args); + assureMatchVersion(); maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -45,7 +46,7 @@ public class StandardTokenizerFactory extends TokenizerFactory { @Override public StandardTokenizer create(AttributeFactory factory) { - StandardTokenizer tokenizer = new StandardTokenizer(factory); + StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java index 951230e67ec..59cfbd16ec7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java @@ -34,9 +34,15 @@ import java.io.Reader; * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and * {@link org.apache.lucene.analysis.core.StopFilter}, using a list of * English stop words. + * + * + *

+ * You must specify the required {@link org.apache.lucene.util.Version} + * compatibility when creating UAX29URLEmailAnalyzer + *

*/ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase { - + /** Default maximum allowed token length */ public static final int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; @@ -47,23 +53,29 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase { public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; /** Builds an analyzer with the given stop words. + * @param matchVersion Lucene version to match See {@link + * above} * @param stopWords stop words */ - public UAX29URLEmailAnalyzer(CharArraySet stopWords) { - super(stopWords); + public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) { + super(matchVersion, stopWords); } /** Builds an analyzer with the default stop words ({@link * #STOP_WORDS_SET}). + * @param matchVersion Lucene version to match See {@link + * above} */ - public UAX29URLEmailAnalyzer() { - this(STOP_WORDS_SET); + public UAX29URLEmailAnalyzer(Version matchVersion) { + this(matchVersion, STOP_WORDS_SET); } /** Builds an analyzer with the stop words from the given reader. - * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader) + * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version) + * @param matchVersion Lucene version to match See {@link + * above} * @param stopwords Reader to read stop words from */ - public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException { - this(loadStopwordSet(stopwords)); + public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) throws IOException { + this(matchVersion, loadStopwordSet(stopwords, matchVersion)); } /** @@ -85,11 +97,11 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName) { - final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(); + final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion); src.setMaxTokenLength(maxTokenLength); - TokenStream tok = new StandardFilter(src); - tok = new LowerCaseFilter(tok); - tok = new StopFilter(tok, stopwords); + TokenStream tok = new StandardFilter(matchVersion, src); + tok = new LowerCaseFilter(matchVersion, tok); + tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index 522276b5b5f..cd1218d8da7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -18,6 +18,9 @@ package org.apache.lucene.analysis.standard; */ import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -25,6 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** * This class implements Word Break rules from the Unicode Text Segmentation @@ -95,19 +100,19 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { * the input to the newly created JFlex scanner. */ - public UAX29URLEmailTokenizer() { - this.scanner = getScanner(); + public UAX29URLEmailTokenizer(Version matchVersion) { + this.scanner = getScannerFor(matchVersion); } /** * Creates a new UAX29URLEmailTokenizer with a given {@link AttributeFactory} */ - public UAX29URLEmailTokenizer(AttributeFactory factory) { + public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory) { super(factory); - this.scanner = getScanner(); + this.scanner = getScannerFor(matchVersion); } - private StandardTokenizerInterface getScanner() { + private StandardTokenizerInterface getScannerFor(Version matchVersion) { return new UAX29URLEmailTokenizerImpl(input); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java index 485b7d33a6e..e1218075aea 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java @@ -38,6 +38,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory { /** Creates a new UAX29URLEmailTokenizerFactory */ public UAX29URLEmailTokenizerFactory(Map args) { super(args); + assureMatchVersion(); maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); @@ -46,7 +47,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory { @Override public UAX29URLEmailTokenizer create(AttributeFactory factory) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(factory); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java index e47e7f8c55c..a8878ea2139 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import org.tartarus.snowball.ext.SwedishStemmer; /** @@ -63,7 +64,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8)); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -75,17 +76,18 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public SwedishAnalyzer() { - this(DefaultSetHolder.DEFAULT_STOP_SET); + public SwedishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public SwedishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -93,12 +95,14 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public SwedishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -115,10 +119,10 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new SwedishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java index 95559767923..7fcbf471c56 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java @@ -134,8 +134,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); - TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; + Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT) : factory.create(); + TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; @@ -202,12 +202,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException { Class clazz = loader.findClass(cname, Analyzer.class); try { - Analyzer analyzer = null; - try { - analyzer = clazz.getConstructor().newInstance(); - } catch (NoSuchMethodException e) { - analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT); - } + Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT); if (analyzer instanceof ResourceLoaderAware) { ((ResourceLoaderAware) analyzer).inform(loader); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java index 6b192eaa799..12e27ad2aff 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -34,7 +35,6 @@ import org.apache.lucene.util.Version; * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words. */ public final class ThaiAnalyzer extends StopwordAnalyzerBase { - private final Version matchVersion; /** File containing default Thai stopwords. */ public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; @@ -87,8 +87,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase { * @param stopwords a stopword set */ public ThaiAnalyzer(Version matchVersion, CharArraySet stopwords) { - super(stopwords); - this.matchVersion = matchVersion; + super(matchVersion, stopwords); } /** @@ -105,15 +104,15 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase { protected TokenStreamComponents createComponents(String fieldName) { if (matchVersion.onOrAfter(Version.LUCENE_4_8)) { final Tokenizer source = new ThaiTokenizer(); - TokenStream result = new LowerCaseFilter(source); - result = new StopFilter(result, stopwords); + TokenStream result = new LowerCaseFilter(matchVersion, source); + result = new StopFilter(matchVersion, result, stopwords); return new TokenStreamComponents(source, result); } else { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new ThaiWordFilter(result); - return new TokenStreamComponents(source, new StopFilter(result, stopwords)); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new ThaiWordFilter(matchVersion, result); + return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords)); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java index 7eb1eda5b5a..c387333ff50 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.util.CharArrayIterator; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** * {@link TokenFilter} that use {@link java.text.BreakIterator} to break each @@ -60,7 +61,7 @@ public final class ThaiWordFilter extends TokenFilter { private boolean hasIllegalOffsets = false; // only if the length changed before this filter /** Creates a new ThaiWordFilter with the specified match version. */ - public ThaiWordFilter(TokenStream input) { + public ThaiWordFilter(Version matchVersion, TokenStream input) { super(input); if (!DBBI_AVAILABLE) throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation"); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java index 154187e2f6a..699af7bf5a2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java @@ -41,6 +41,7 @@ public class ThaiWordFilterFactory extends TokenFilterFactory { /** Creates a new ThaiWordFilterFactory */ public ThaiWordFilterFactory(Map args) { super(args); + assureMatchVersion(); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -48,7 +49,7 @@ public class ThaiWordFilterFactory extends TokenFilterFactory { @Override public ThaiWordFilter create(TokenStream input) { - return new ThaiWordFilter(input); + return new ThaiWordFilter(luceneMatchVersion, input); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java index 90d75abe44b..0c8842bbfe5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java @@ -38,7 +38,6 @@ import org.tartarus.snowball.ext.TurkishStemmer; */ public final class TurkishAnalyzer extends StopwordAnalyzerBase { private final CharArraySet stemExclusionSet; - private final Version matchVersion; /** File containing default Turkish stopwords. */ public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; @@ -102,9 +101,9 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase { * @param stemExclusionSet a set of terms not to be stemmed */ public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); - this.matchVersion = matchVersion; - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + super(matchVersion, stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -121,12 +120,12 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); if(matchVersion.onOrAfter(Version.LUCENE_4_8)) result = new ApostropheFilter(result); result = new TurkishLowerCaseFilter(result); - result = new StopFilter(result, stopwords); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new TurkishStemmer()); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java index 325e5dbad22..5234440d0ac 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java @@ -238,10 +238,12 @@ public abstract class AbstractAnalysisFactory { if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start - words = new CharArraySet(files.size() * 10, ignoreCase); + words = new CharArraySet(luceneMatchVersion, + files.size() * 10, ignoreCase); for (String file : files) { List wlist = getLines(loader, file.trim()); - words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); + words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, + ignoreCase)); } } return words; @@ -264,7 +266,8 @@ public abstract class AbstractAnalysisFactory { if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start - words = new CharArraySet(files.size() * 10, ignoreCase); + words = new CharArraySet(luceneMatchVersion, + files.size() * 10, ignoreCase); for (String file : files) { InputStream stream = null; Reader reader = null; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java index 7529d93d8df..f867cf7ea88 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java @@ -25,6 +25,8 @@ import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.util.CharacterUtils; +import org.apache.lucene.util.Version; + /** * A simple class that stores key Strings as char[]'s in a @@ -34,6 +36,19 @@ import org.apache.lucene.analysis.util.CharacterUtils; * etc. It is designed to be quick to retrieve items * by char[] keys without the necessity of converting * to a String first. + * + * + *

You must specify the required {@link Version} + * compatibility when creating {@link CharArrayMap}: + *

    + *
  • As of 3.1, supplementary characters are + * properly lowercased.
  • + *
+ * Before 3.1 supplementary characters could not be + * lowercased correctly due to the lack of Unicode 4 + * support in JDK 1.4. To use instances of + * {@link CharArrayMap} with the behavior before Lucene + * 3.1 pass a {@link Version} < 3.1 to the constructors. */ public class CharArrayMap extends AbstractMap { // private only because missing generics @@ -43,12 +58,16 @@ public class CharArrayMap extends AbstractMap { private final CharacterUtils charUtils; private boolean ignoreCase; private int count; + final Version matchVersion; // package private because used in CharArraySet char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator /** * Create map with enough capacity to hold startSize terms - * + * + * @param matchVersion + * compatibility match version see Version + * note above for details. * @param startSize * the initial capacity * @param ignoreCase @@ -56,27 +75,31 @@ public class CharArrayMap extends AbstractMap { * otherwise true. */ @SuppressWarnings("unchecked") - public CharArrayMap(int startSize, boolean ignoreCase) { + public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) { this.ignoreCase = ignoreCase; int size = INIT_SIZE; while(startSize + (startSize>>2) > size) size <<= 1; keys = new char[size][]; values = (V[]) new Object[size]; - this.charUtils = CharacterUtils.getInstance(); + this.charUtils = CharacterUtils.getInstance(matchVersion); + this.matchVersion = matchVersion; } /** * Creates a map from the mappings in another map. - * + * + * @param matchVersion + * compatibility match version see Version + * note above for details. * @param c * a map whose mappings to be copied * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArrayMap(Map c, boolean ignoreCase) { - this(c.size(), ignoreCase); + public CharArrayMap(Version matchVersion, Map c, boolean ignoreCase) { + this(matchVersion, c.size(), ignoreCase); putAll(c); } @@ -87,6 +110,7 @@ public class CharArrayMap extends AbstractMap { this.ignoreCase = toCopy.ignoreCase; this.count = toCopy.count; this.charUtils = toCopy.charUtils; + this.matchVersion = toCopy.matchVersion; } /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */ @@ -541,7 +565,18 @@ public class CharArrayMap extends AbstractMap { /** * Returns a copy of the given map as a {@link CharArrayMap}. If the given map * is a {@link CharArrayMap} the ignoreCase property will be preserved. + *

+ * Note: If you intend to create a copy of another {@link CharArrayMap} where + * the {@link Version} of the source map differs from its copy + * {@link #CharArrayMap(Version, Map, boolean)} should be used instead. + * The {@link #copy(Version, Map)} will preserve the {@link Version} of the + * source map it is an instance of {@link CharArrayMap}. + *

* + * @param matchVersion + * compatibility match version see Version + * note above for details. This argument will be ignored if the + * given map is a {@link CharArrayMap}. * @param map * a map to copy * @return a copy of the given map as a {@link CharArrayMap}. If the given map @@ -549,7 +584,7 @@ public class CharArrayMap extends AbstractMap { * matchVersion will be of the given map will be preserved. */ @SuppressWarnings("unchecked") - public static CharArrayMap copy(final Map map) { + public static CharArrayMap copy(final Version matchVersion, final Map map) { if(map == EMPTY_MAP) return emptyMap(); if(map instanceof CharArrayMap) { @@ -565,7 +600,7 @@ public class CharArrayMap extends AbstractMap { m.values = values; return m; } - return new CharArrayMap<>(map, false); + return new CharArrayMap<>(matchVersion, map, false); } /** Returns an empty, unmodifiable map. */ @@ -624,7 +659,7 @@ public class CharArrayMap extends AbstractMap { */ private static final class EmptyCharArrayMap extends UnmodifiableCharArrayMap { EmptyCharArrayMap() { - super(new CharArrayMap(0, false)); + super(new CharArrayMap(Version.LUCENE_CURRENT, 0, false)); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java index 4b9b264eef5..109f2472867 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java @@ -22,6 +22,9 @@ import java.util.Collection; import java.util.Iterator; import java.util.Set; +import org.apache.lucene.util.Version; + + /** * A simple class that stores Strings as char[]'s in a * hash table. Note that this is not a general purpose @@ -31,6 +34,18 @@ import java.util.Set; * is in the set without the necessity of converting it * to a String first. * + * + *

You must specify the required {@link Version} + * compatibility when creating {@link CharArraySet}: + *

    + *
  • As of 3.1, supplementary characters are + * properly lowercased.
  • + *
+ * Before 3.1 supplementary characters could not be + * lowercased correctly due to the lack of Unicode 4 + * support in JDK 1.4. To use instances of + * {@link CharArraySet} with the behavior before Lucene + * 3.1 pass a {@link Version} < 3.1 to the constructors. *

* Please note: This class implements {@link java.util.Set Set} but * does not behave like it should in all cases. The generic type is @@ -49,27 +64,33 @@ public class CharArraySet extends AbstractSet { /** * Create set with enough capacity to hold startSize terms * + * @param matchVersion + * compatibility match version see Version + * note above for details. * @param startSize * the initial capacity * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(int startSize, boolean ignoreCase) { - this(new CharArrayMap<>(startSize, ignoreCase)); + public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) { + this(new CharArrayMap<>(matchVersion, startSize, ignoreCase)); } /** * Creates a set from a Collection of objects. * + * @param matchVersion + * compatibility match version see Version + * note above for details. * @param c * a collection whose elements to be placed into the set * @param ignoreCase * false if and only if the set should be case sensitive * otherwise true. */ - public CharArraySet(Collection c, boolean ignoreCase) { - this(c.size(), ignoreCase); + public CharArraySet(Version matchVersion, Collection c, boolean ignoreCase) { + this(matchVersion, c.size(), ignoreCase); addAll(c); } @@ -151,21 +172,32 @@ public class CharArraySet extends AbstractSet { /** * Returns a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property will be preserved. + *

+ * Note: If you intend to create a copy of another {@link CharArraySet} where + * the {@link Version} of the source set differs from its copy + * {@link #CharArraySet(Version, Collection, boolean)} should be used instead. + * The {@link #copy(Version, Set)} will preserve the {@link Version} of the + * source set it is an instance of {@link CharArraySet}. + *

* + * @param matchVersion + * compatibility match version see Version + * note above for details. This argument will be ignored if the + * given set is a {@link CharArraySet}. * @param set * a set to copy * @return a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property as well as the * matchVersion will be of the given set will be preserved. */ - public static CharArraySet copy(final Set set) { + public static CharArraySet copy(final Version matchVersion, final Set set) { if(set == EMPTY_SET) return EMPTY_SET; if(set instanceof CharArraySet) { final CharArraySet source = (CharArraySet) set; - return new CharArraySet(CharArrayMap.copy(source.map)); + return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map)); } - return new CharArraySet(set, false); + return new CharArraySet(matchVersion, set, false); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java index fd290a6c00f..bfa40a02af1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java @@ -18,12 +18,15 @@ package org.apache.lucene.analysis.util; */ import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.analysis.util.CharacterUtils; +import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer; /** @@ -33,18 +36,25 @@ public abstract class CharTokenizer extends Tokenizer { /** * Creates a new {@link CharTokenizer} instance + * + * @param matchVersion + * Lucene version to match */ - public CharTokenizer() { + public CharTokenizer(Version matchVersion) { + charUtils = CharacterUtils.getInstance(matchVersion); } /** * Creates a new {@link CharTokenizer} instance * + * @param matchVersion + * Lucene version to match * @param factory * the attribute factory to use for this {@link Tokenizer} */ - public CharTokenizer(AttributeFactory factory) { + public CharTokenizer(Version matchVersion, AttributeFactory factory) { super(factory); + charUtils = CharacterUtils.getInstance(matchVersion); } private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0; @@ -54,7 +64,7 @@ public abstract class CharTokenizer extends Tokenizer { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final CharacterUtils charUtils = CharacterUtils.getInstance(); + private final CharacterUtils charUtils; private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java index b864ca20c51..022bfe1cfc2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java @@ -34,25 +34,29 @@ public abstract class CharacterUtils { private static final Java5CharacterUtils JAVA_5 = new Java5CharacterUtils(); /** - * Returns a {@link CharacterUtils} implementation. + * Returns a {@link CharacterUtils} implementation according to the given + * {@link Version} instance. + * + * @param matchVersion + * a version instance * @return a {@link CharacterUtils} implementation according to the given * {@link Version} instance. */ - public static CharacterUtils getInstance() { + public static CharacterUtils getInstance(final Version matchVersion) { return JAVA_5; } - /** - * explicitly returns a version matching java 4 semantics - * @deprecated Only for n-gram backwards compat - */ - @Deprecated + /** explicitly returns a version matching java 4 semantics */ public static CharacterUtils getJava4Instance() { return JAVA_4; } /** * Returns the code point at the given index of the {@link CharSequence}. + * Depending on the {@link Version} passed to + * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior + * of {@link Character#codePointAt(char[], int)} as it would have been + * available on a Java 1.4 JVM or on a later virtual machine version. * * @param seq * a character sequence @@ -71,6 +75,10 @@ public abstract class CharacterUtils { /** * Returns the code point at the given index of the char array where only elements * with index less than the limit are used. + * Depending on the {@link Version} passed to + * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior + * of {@link Character#codePointAt(char[], int)} as it would have been + * available on a Java 1.4 JVM or on a later virtual machine version. * * @param chars * a character array @@ -180,7 +188,10 @@ public abstract class CharacterUtils { * the middle of a surrogate pair, even if there are remaining characters in * the {@link Reader}. *

- * This method guarantees + * Depending on the {@link Version} passed to + * {@link CharacterUtils#getInstance(Version)} this method implements + * supplementary character awareness when filling the given buffer. For all + * {@link Version} > 3.0 {@link #fill(CharacterBuffer, Reader, int)} guarantees * that the given {@link CharacterBuffer} will never contain a high surrogate * character as the last element in the buffer unless it is the last available * character in the reader. In other words, high and low surrogate pairs will diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java index de228ff40d0..1f5071a5382 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java @@ -32,15 +32,18 @@ import org.apache.lucene.util.Version; */ public abstract class FilteringTokenFilter extends TokenFilter { + protected final Version version; private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); private int skippedPositions; /** * Create a new {@link FilteringTokenFilter}. + * @param version the Lucene match version * @param in the {@link TokenStream} to consume */ - public FilteringTokenFilter(TokenStream in) { + public FilteringTokenFilter(Version version, TokenStream in) { super(in); + this.version = version; } /** Override this method and return if the current input token should be returned by {@link #incrementToken}. */ diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java index ff1517e90d0..b98c33588c5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java @@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * Base class for Analyzers that need to make use of stopword sets. @@ -36,6 +37,8 @@ public abstract class StopwordAnalyzerBase extends Analyzer { */ protected final CharArraySet stopwords; + protected final Version matchVersion; + /** * Returns the analyzer's stopword set or an empty set if the analyzer has no * stopwords @@ -50,20 +53,26 @@ public abstract class StopwordAnalyzerBase extends Analyzer { /** * Creates a new instance initialized with the given stopword set * + * @param version + * the Lucene version for cross version compatibility * @param stopwords * the analyzer's stopword set */ - protected StopwordAnalyzerBase(final CharArraySet stopwords) { + protected StopwordAnalyzerBase(final Version version, final CharArraySet stopwords) { + matchVersion = version; // analyzers should use char array set for stopwords! this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet - .unmodifiableSet(CharArraySet.copy(stopwords)); + .unmodifiableSet(CharArraySet.copy(version, stopwords)); } /** * Creates a new Analyzer with an empty stopword set + * + * @param version + * the Lucene version for cross version compatibility */ - protected StopwordAnalyzerBase() { - this(null); + protected StopwordAnalyzerBase(final Version version) { + this(version, null); } /** @@ -90,7 +99,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer { Reader reader = null; try { reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8); - return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase)); + return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase)); } finally { IOUtils.close(reader); } @@ -102,16 +111,20 @@ public abstract class StopwordAnalyzerBase extends Analyzer { * * @param stopwords * the stopwords file to load + * + * @param matchVersion + * the Lucene version for cross version compatibility * @return a CharArraySet containing the distinct stopwords from the given * file * @throws IOException * if loading the stopwords throws an {@link IOException} */ - protected static CharArraySet loadStopwordSet(File stopwords) throws IOException { + protected static CharArraySet loadStopwordSet(File stopwords, + Version matchVersion) throws IOException { Reader reader = null; try { reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8); - return WordlistLoader.getWordSet(reader); + return WordlistLoader.getWordSet(reader, matchVersion); } finally { IOUtils.close(reader); } @@ -123,14 +136,17 @@ public abstract class StopwordAnalyzerBase extends Analyzer { * @param stopwords * the stopwords reader to load * + * @param matchVersion + * the Lucene version for cross version compatibility * @return a CharArraySet containing the distinct stopwords from the given * reader * @throws IOException * if loading the stopwords throws an {@link IOException} */ - protected static CharArraySet loadStopwordSet(Reader stopwords) throws IOException { + protected static CharArraySet loadStopwordSet(Reader stopwords, + Version matchVersion) throws IOException { try { - return WordlistLoader.getWordSet(stopwords); + return WordlistLoader.getWordSet(stopwords, matchVersion); } finally { IOUtils.close(stopwords); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java index 26f03b896f8..8fec2c00d19 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** * Loader for text files that represent a list of stopwords. @@ -72,10 +73,11 @@ public class WordlistLoader { * Analyzer which uses LowerCaseFilter (like StandardAnalyzer). * * @param reader Reader containing the wordlist + * @param matchVersion the Lucene {@link Version} * @return A {@link CharArraySet} with the reader's words */ - public static CharArraySet getWordSet(Reader reader) throws IOException { - return getWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false)); + public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException { + return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)); } /** @@ -86,10 +88,11 @@ public class WordlistLoader { * * @param reader Reader containing the wordlist * @param comment The string representing a comment. + * @param matchVersion the Lucene {@link Version} * @return A CharArraySet with the reader's words */ - public static CharArraySet getWordSet(Reader reader, String comment) throws IOException { - return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false)); + public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException { + return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)); } /** @@ -167,10 +170,11 @@ public class WordlistLoader { *

* * @param reader Reader containing a Snowball stopword list + * @param matchVersion the Lucene {@link Version} * @return A {@link CharArraySet} with the reader's words */ - public static CharArraySet getSnowballWordSet(Reader reader) throws IOException { - return getSnowballWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false)); + public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException { + return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false)); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java index 49275c9328c..80b067e9052 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java @@ -31,14 +31,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new ArabicAnalyzer(); + new ArabicAnalyzer(TEST_VERSION_CURRENT); } /** * Some simple tests showing some features of the analyzer, how some regular forms will conflate */ public void testBasicFeatures() throws Exception { - ArabicAnalyzer a = new ArabicAnalyzer(); + ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "كبير", new String[] { "كبير" }); assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker @@ -59,7 +59,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { * Simple tests to show things are getting reset correctly, etc. */ public void testReusableTokenStream() throws Exception { - ArabicAnalyzer a = new ArabicAnalyzer(); + ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "كبير", new String[] { "كبير" }); assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker } @@ -68,7 +68,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { * Non-arabic text gets treated in a similar way as SimpleAnalyzer. */ public void testEnglishInput() throws Exception { - assertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] { + assertAnalyzesTo(new ArabicAnalyzer(TEST_VERSION_CURRENT), "English text.", new String[] { "english", "text" }); } @@ -76,26 +76,26 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase { * Test that custom stopwords work, and are not case-sensitive. */ public void testCustomStopwords() throws Exception { - CharArraySet set = new CharArraySet(asSet("the", "and", "a"), false); - ArabicAnalyzer a = new ArabicAnalyzer(set); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false); + ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set); assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" }); } public void testWithStemExclusionSet() throws IOException { - CharArraySet set = new CharArraySet(asSet("ساهدهات"), false); - ArabicAnalyzer a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, set); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("ساهدهات"), false); + ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); - a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET); + a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ArabicAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ArabicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java index 8768e290350..851db2b7c9a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java @@ -118,7 +118,7 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase { } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("ساهدهات"); MockTokenizer tokenStream = whitespaceMockTokenizer("ساهدهات"); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java index a6d188c2e90..e1579dc6d01 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java @@ -33,22 +33,22 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { * This test fails with NPE when the stopwords file is missing in classpath */ public void testResourcesAvailable() { - new BulgarianAnalyzer(); + new BulgarianAnalyzer(TEST_VERSION_CURRENT); } public void testStopwords() throws IOException { - Analyzer a = new BulgarianAnalyzer(); + Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"}); } public void testCustomStopwords() throws IOException { - Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET); + Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "Как се казваш?", new String[] {"как", "се", "казваш"}); } public void testReusableTokenStream() throws IOException { - Analyzer a = new BulgarianAnalyzer(); + Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "документи", new String[] {"документ"}); assertAnalyzesTo(a, "документ", new String[] {"документ"}); } @@ -57,7 +57,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { * Test some examples from the paper */ public void testBasicExamples() throws IOException { - Analyzer a = new BulgarianAnalyzer(); + Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "енергийни кризи", new String[] {"енергийн", "криз"}); assertAnalyzesTo(a, "Атомната енергия", new String[] {"атомн", "енерг"}); @@ -68,14 +68,14 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase { } public void testWithStemExclusionSet() throws IOException { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); - Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET, set); + Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new BulgarianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new BulgarianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java index 39b0be00250..e176afafed6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java @@ -39,7 +39,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * common (and some rare) plural pattern is listed. */ public void testMasculineNouns() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(); + BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); // -и pattern assertAnalyzesTo(a, "град", new String[] {"град"}); @@ -105,7 +105,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * Test showing how feminine noun forms conflate */ public void testFeminineNouns() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(); + BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "вест", new String[] {"вест"}); assertAnalyzesTo(a, "вестта", new String[] {"вест"}); @@ -118,7 +118,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * plural pattern is listed */ public void testNeuterNouns() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(); + BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); // -а pattern assertAnalyzesTo(a, "дърво", new String[] {"дърв"}); @@ -146,7 +146,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * Test showing how adjectival forms conflate */ public void testAdjectives() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(); + BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "красив", new String[] {"красив"}); assertAnalyzesTo(a, "красивия", new String[] {"красив"}); assertAnalyzesTo(a, "красивият", new String[] {"красив"}); @@ -162,7 +162,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { * Test some exceptional rules, implemented as rewrites. */ public void testExceptions() throws IOException { - BulgarianAnalyzer a = new BulgarianAnalyzer(); + BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT); // ци -> к assertAnalyzesTo(a, "собственик", new String[] {"собственик"}); @@ -217,7 +217,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase { } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenStream.setReader(new StringReader("строевете строеве")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java index 3307fbd1c20..33c2075d87b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java @@ -130,7 +130,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - Analyzer a = new BrazilianAnalyzer(); + Analyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT); checkReuse(a, "boa", "boa"); checkReuse(a, "boainain", "boainain"); checkReuse(a, "boas", "boas"); @@ -138,15 +138,15 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } public void testStemExclusionTable() throws Exception { - BrazilianAnalyzer a = new BrazilianAnalyzer( - CharArraySet.EMPTY_SET, new CharArraySet(asSet("quintessência"), false)); + BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT, + CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("quintessência"), false)); checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged. } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("Brasília"); - Tokenizer tokenizer = new LowerCaseTokenizer(); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader("Brasília Brasilia")); BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(tokenizer, set)); @@ -154,7 +154,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { } private void check(final String input, final String expected) throws Exception { - checkOneTerm(new BrazilianAnalyzer(), input, expected); + checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected); } private void checkReuse(Analyzer a, String input, String expected) throws Exception { @@ -163,7 +163,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new BrazilianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new BrazilianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } public void testEmptyTerm() throws IOException { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java index bc14adc7897..4d32666b9c1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java @@ -27,12 +27,12 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new CatalanAnalyzer(); + new CatalanAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new CatalanAnalyzer(); + Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "llengües", "llengu"); checkOneTerm(a, "llengua", "llengu"); @@ -42,21 +42,22 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase { /** test use of elisionfilter */ public void testContractions() throws IOException { - Analyzer a = new CatalanAnalyzer(); + Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "Diccionari de l'Institut d'Estudis Catalans", new String[] { "diccion", "inst", "estud", "catalan" }); } /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(asSet("llengües"), false); - Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet); + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("llengües"), false); + Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT, + CatalanAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "llengües", "llengües"); checkOneTerm(a, "llengua", "llengu"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new CatalanAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new CatalanAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java index fc25c5496c0..a7346e7db9e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java @@ -39,7 +39,7 @@ import org.apache.lucene.analysis.util.CharArraySet; * Most tests adopted from TestCJKTokenizer */ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { - private Analyzer analyzer = new CJKAnalyzer(); + private Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT); public void testJa1() throws IOException { assertAnalyzesTo(analyzer, "一二三四五六七八九十", @@ -209,7 +209,7 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer)); } @@ -255,7 +255,7 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); TokenFilter filter = new FakeStandardTokenizer(tokenizer); - filter = new StopFilter(filter, CharArraySet.EMPTY_SET); + filter = new StopFilter(TEST_VERSION_CURRENT, filter, CharArraySet.EMPTY_SET); filter = new CJKBigramFilter(filter); return new TokenStreamComponents(tokenizer, filter); } @@ -271,13 +271,13 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new CJKAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new CJKAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } /** blast some random strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new CJKAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); } public void testEmptyTerm() throws IOException { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java index 08684009d8e..600e369f4fa 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java @@ -29,7 +29,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(); + Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); return new TokenStreamComponents(t, new CJKBigramFilter(t)); } }; @@ -37,7 +37,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer unibiAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(); + Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, true)); } @@ -67,7 +67,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(); + Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN)); } }; @@ -85,7 +85,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(); + Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, false)); } @@ -119,7 +119,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new StandardTokenizer(); + Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT); return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java index 9a2c9d9969a..e5faa279839 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java @@ -32,35 +32,35 @@ public class TestSoraniAnalyzer extends BaseTokenStreamTestCase { * This test fails with NPE when the stopwords file is missing in classpath */ public void testResourcesAvailable() { - new SoraniAnalyzer(); + new SoraniAnalyzer(TEST_VERSION_CURRENT); } public void testStopwords() throws IOException { - Analyzer a = new SoraniAnalyzer(); + Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "ئەم پیاوە", new String[] {"پیاو"}); } public void testCustomStopwords() throws IOException { - Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET); + Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); assertAnalyzesTo(a, "ئەم پیاوە", new String[] {"ئەم", "پیاو"}); } public void testReusableTokenStream() throws IOException { - Analyzer a = new SoraniAnalyzer(); + Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "پیاوە", new String[] {"پیاو"}); assertAnalyzesTo(a, "پیاو", new String[] {"پیاو"}); } public void testWithStemExclusionSet() throws IOException { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("پیاوە"); - Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET, set); + Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "پیاوە", new String[] { "پیاوە" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SoraniAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SoraniAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java index ac2543d7bd5..4a405fc4d08 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniStemFilter.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.core.KeywordTokenizer; * Test the Sorani Stemmer. */ public class TestSoraniStemFilter extends BaseTokenStreamTestCase { - SoraniAnalyzer a = new SoraniAnalyzer(); + SoraniAnalyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT); public void testIndefiniteSingular() throws Exception { checkOneTerm(a, "پیاوێک", "پیاو"); // -ek diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java index 105f214a001..43300eb8968 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java @@ -29,15 +29,15 @@ import org.apache.lucene.analysis.util.CharArraySet; * Tests CommonGrams(Query)Filter */ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { - private static final CharArraySet commonWords = new CharArraySet(Arrays.asList( + private static final CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList( "s", "a", "b", "c", "d", "the", "of" ), false); public void testReset() throws Exception { final String input = "How the s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class); cgf.reset(); @@ -59,9 +59,9 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { public void testQueryReset() throws Exception { final String input = "How the s a brown s cow d like A B thing?"; - WhitespaceTokenizer wt = new WhitespaceTokenizer(); + WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); CharTermAttribute term = wt.addAttribute(CharTermAttribute.class); @@ -93,7 +93,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter( + return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords))); } }; @@ -163,7 +163,8 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new CommonGramsFilter(tokenizer, commonWords)); + return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, + tokenizer, commonWords)); } }; @@ -251,7 +252,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "How The s a brown s cow d like A B thing?"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - TokenFilter cgf = new CommonGramsFilter(wt, commonWords); + TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?"}); @@ -264,7 +265,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "dog the"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "dog_the" }); } @@ -276,7 +277,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "the dog"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the_dog" }); } @@ -288,7 +289,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "the"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the" }); } @@ -300,7 +301,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "monster"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "monster" }); } @@ -312,7 +313,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { final String input = "the of"; MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false); wt.setReader(new StringReader(input)); - CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter nsf = new CommonGramsQueryFilter(cgf); assertTokenStreamContents(nsf, new String[] { "the_of" }); } @@ -324,7 +325,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false); - CommonGramsFilter cgf = new CommonGramsFilter(t, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return new TokenStreamComponents(t, cgf); } }; @@ -336,7 +337,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false); - CommonGramsFilter cgf = new CommonGramsFilter(t, commonWords); + CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords); return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java index def0f39114a..6385ff74580 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java @@ -42,7 +42,7 @@ import org.xml.sax.InputSource; public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { private static CharArraySet makeDictionary(String... dictionary) { - return new CharArraySet(Arrays.asList(dictionary), true); + return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true); } public void testHyphenationCompoundWordsDA() throws Exception { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java index 24b9629e527..5fc558296d6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java @@ -31,7 +31,7 @@ import org.apache.lucene.util.BytesRef; public class TestAnalyzers extends BaseTokenStreamTestCase { public void testSimple() throws Exception { - Analyzer a = new SimpleAnalyzer(); + Analyzer a = new SimpleAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo", "bar", "foo", "bar" }); assertAnalyzesTo(a, "foo bar . FOO <> BAR", @@ -51,7 +51,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { } public void testNull() throws Exception { - Analyzer a = new WhitespaceAnalyzer(); + Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo", "bar", "FOO", "BAR" }); assertAnalyzesTo(a, "foo bar . FOO <> BAR", @@ -71,7 +71,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { } public void testStop() throws Exception { - Analyzer a = new StopAnalyzer(); + Analyzer a = new StopAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo", "bar", "foo", "bar" }); assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", @@ -94,12 +94,12 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testPayloadCopy() throws IOException { String s = "how now brown cow"; TokenStream ts; - ts = new WhitespaceTokenizer(); + ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT); ((Tokenizer)ts).setReader(new StringReader(s)); ts = new PayloadSetter(ts); verifyPayload(ts); - ts = new WhitespaceTokenizer(); + ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT); ((Tokenizer)ts).setReader(new StringReader(s)); ts = new PayloadSetter(ts); verifyPayload(ts); @@ -124,8 +124,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new WhitespaceTokenizer(); - return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer)); + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer)); } } @@ -134,8 +134,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new WhitespaceTokenizer(); - return new TokenStreamComponents(tokenizer, new UpperCaseFilter(tokenizer)); + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + return new TokenStreamComponents(tokenizer, new UpperCaseFilter(TEST_VERSION_CURRENT, tokenizer)); } } @@ -190,9 +190,10 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testLowerCaseFilterLowSurrogateLeftover() throws IOException { // test if the limit of the termbuffer is correctly used with supplementary // chars - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader("BogustermBogusterm\udc16")); - LowerCaseFilter filter = new LowerCaseFilter(tokenizer); + LowerCaseFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, + tokenizer); assertTokenStreamContents(filter, new String[] {"bogustermbogusterm\udc16"}); filter.reset(); String highSurEndingUpper = "BogustermBoguster\ud801"; @@ -207,7 +208,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testLowerCaseTokenizer() throws IOException { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); - LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "\ud801\udc44test" }); @@ -215,7 +216,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { public void testWhitespaceTokenizer() throws IOException { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "\ud801\udc1ctest" }); @@ -223,17 +224,17 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new WhitespaceAnalyzer(), 1000*RANDOM_MULTIPLIER); - checkRandomData(random(), new SimpleAnalyzer(), 1000*RANDOM_MULTIPLIER); - checkRandomData(random(), new StopAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SimpleAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new StopAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new WhitespaceAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); - checkRandomData(random, new SimpleAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); - checkRandomData(random, new StopAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java index 52f80e561cf..65bc8b2bbcc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java @@ -47,7 +47,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; @SuppressCodecs("Direct") public class TestBugInSomething extends BaseTokenStreamTestCase { public void test() throws Exception { - final CharArraySet cas = new CharArraySet(3, false); + final CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); @@ -62,7 +62,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenFilter.ENGLISH_STOPSET, false, -65); - TokenFilter f = new CommonGramsFilter(t, cas); + TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return new TokenStreamComponents(t, f); } @@ -263,7 +263,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase { } public void testCuriousWikipediaString() throws Exception { - final CharArraySet protWords = new CharArraySet(new HashSet<>( + final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>( Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false); final byte table[] = new byte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java index 70f60d108b0..e1ae2d55eb5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java @@ -40,16 +40,16 @@ import java.util.Random; public class TestClassicAnalyzer extends BaseTokenStreamTestCase { - private Analyzer a = new ClassicAnalyzer(); + private Analyzer a = new ClassicAnalyzer(TEST_VERSION_CURRENT); public void testMaxTermLength() throws Exception { - ClassicAnalyzer sa = new ClassicAnalyzer(); + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.setMaxTokenLength(5); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}); } public void testMaxTermLength2() throws Exception { - ClassicAnalyzer sa = new ClassicAnalyzer(); + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"}); sa.setMaxTokenLength(5); @@ -113,7 +113,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { public void testLucene1140() throws Exception { try { - ClassicAnalyzer analyzer = new ClassicAnalyzer(); + ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); } catch (NullPointerException e) { fail("Should not throw an NPE and it did"); @@ -123,7 +123,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { public void testDomainNames() throws Exception { // Current lucene should not show the bug - ClassicAnalyzer a2 = new ClassicAnalyzer(); + ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); // domain names assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"}); @@ -137,7 +137,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { // 2.4 should not show the bug. But, alas, it's also obsolete, // so we check latest released (Robert's gonna break this on 4.0 soon :) ) - a2 = new ClassicAnalyzer(); + a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); } @@ -244,7 +244,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { } public void testJava14BWCompatibility() throws Exception { - ClassicAnalyzer sa = new ClassicAnalyzer(); + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" }); } @@ -253,7 +253,8 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { */ public void testWickedLongTerm() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer())); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.fill(chars, 'x'); @@ -299,7 +300,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { // maximum length term, and search on that term: doc = new Document(); doc.add(new TextField("content", bigTerm, Field.Store.NO)); - ClassicAnalyzer sa = new ClassicAnalyzer(); + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); @@ -313,12 +314,12 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ClassicAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new ClassicAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java index 63df6be7cae..8d97f33d7fe 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java @@ -69,7 +69,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -89,7 +89,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -107,7 +107,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -126,7 +126,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -144,7 +144,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -163,7 +163,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java index 94599b19224..6de3c4a0fee 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java @@ -49,7 +49,8 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer())); + IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( + TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))); Document doc = new Document(); doc.add(new StringField("partnum", "Q36", Field.Store.YES)); @@ -71,10 +72,10 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { /* public void testPerFieldAnalyzer() throws Exception { - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT)); analyzer.addAnalyzer("partnum", new KeywordAnalyzer()); - QueryParser queryParser = new QueryParser(, "description", analyzer); + QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, "description", analyzer); Query query = queryParser.parse("partnum:Q36 AND SPACE"); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index a0e020e27ec..f3972e84982 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -385,7 +385,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { put(CharArraySet.class, new ArgProducer() { @Override public Object create(Random random) { int num = random.nextInt(10); - CharArraySet set = new CharArraySet(num, random.nextBoolean()); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean()); for (int i = 0; i < num; i++) { // TODO: make nastier set.add(TestUtil.randomSimpleString(random)); @@ -494,7 +494,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { put(CharArrayMap.class, new ArgProducer() { @Override public Object create(Random random) { int num = random.nextInt(10); - CharArrayMap map = new CharArrayMap<>(num, random.nextBoolean()); + CharArrayMap map = new CharArrayMap<>(TEST_VERSION_CURRENT, num, random.nextBoolean()); for (int i = 0; i < num; i++) { // TODO: make nastier map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random)); @@ -619,7 +619,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { args[i] = stream; } else if (paramType == CommonGramsFilter.class) { // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly - args[i] = new CommonGramsFilter(stream, newRandomArg(random, CharArraySet.class)); + args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, newRandomArg(random, CharArraySet.class)); } else { args[i] = newRandomArg(random, paramType); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java index b31cf3910cb..bbb656c9794 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java @@ -41,7 +41,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - StandardTokenizer tokenizer = new StandardTokenizer(); + StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -50,7 +50,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; @@ -282,13 +282,13 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new StandardAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new StandardAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - checkRandomData(random, new StandardAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); + checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); } // Adds random graph after: @@ -298,7 +298,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, tokenStream); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java index 605deedd758..9838fe1f8a2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java @@ -31,7 +31,7 @@ import java.util.HashSet; public class TestStopAnalyzer extends BaseTokenStreamTestCase { - private StopAnalyzer stop = new StopAnalyzer(); + private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT); private Set inValidTokens = new HashSet<>(); @Override @@ -59,8 +59,8 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { } public void testStopList() throws IOException { - CharArraySet stopWordsSet = new CharArraySet(asSet("good", "test", "analyzer"), false); - StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); + CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); + StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); try (TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer")) { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); @@ -75,8 +75,8 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { } public void testStopListPositions() throws IOException { - CharArraySet stopWordsSet = new CharArraySet(asSet("good", "test", "analyzer"), false); - StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); + CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); + StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); String s = "This is a good test of the english stop analyzer with positions"; int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; try (TokenStream stream = newStop.tokenStream("test", s)) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java index af91866080a..972f8731912 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java @@ -37,20 +37,20 @@ public class TestStopFilter extends BaseTokenStreamTestCase { public void testExactCase() throws IOException { StringReader reader = new StringReader("Now is The Time"); - CharArraySet stopWords = new CharArraySet(asSet("is", "the", "Time"), false); + CharArraySet stopWords = new CharArraySet(TEST_VERSION_CURRENT, asSet("is", "the", "Time"), false); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(reader); - TokenStream stream = new StopFilter(in, stopWords); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, in, stopWords); assertTokenStreamContents(stream, new String[] { "Now", "The" }); } public void testStopFilt() throws IOException { StringReader reader = new StringReader("Now is The Time"); String[] stopWords = new String[] { "is", "the", "Time" }; - CharArraySet stopSet = StopFilter.makeStopSet(stopWords); + CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(reader); - TokenStream stream = new StopFilter(in, stopSet); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, in, stopSet); assertTokenStreamContents(stream, new String[] { "Now", "The" }); } @@ -68,12 +68,12 @@ public class TestStopFilter extends BaseTokenStreamTestCase { log(sb.toString()); String stopWords[] = a.toArray(new String[0]); for (int i=0; i a0 = new ArrayList<>(); @@ -89,22 +89,22 @@ public class TestStopFilter extends BaseTokenStreamTestCase { for (int i=0; i stopTypes = asSet(""); - final StandardTokenizer input = new StandardTokenizer(newAttributeFactory()); + final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); input.setReader(reader); - TokenStream stream = new TypeTokenFilter(input, stopTypes); + TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopTypes); assertTokenStreamContents(stream, new String[]{"is", "palindrome", "while", "is", "not"}); } @@ -61,9 +61,9 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase { // with increments StringReader reader = new StringReader(sb.toString()); - final StandardTokenizer input = new StandardTokenizer(); + final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT); input.setReader(reader); - TypeTokenFilter typeTokenFilter = new TypeTokenFilter(input, stopSet); + TypeTokenFilter typeTokenFilter = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopSet); testPositons(typeTokenFilter); } @@ -85,9 +85,9 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase { public void testTypeFilterWhitelist() throws IOException { StringReader reader = new StringReader("121 is palindrome, while 123 is not"); Set stopTypes = Collections.singleton(""); - final StandardTokenizer input = new StandardTokenizer(newAttributeFactory()); + final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); input.setReader(reader); - TokenStream stream = new TypeTokenFilter(input, stopTypes, true); + TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopTypes, true); assertTokenStreamContents(stream, new String[]{"121", "123"}); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java index 0138c2f7d8c..75cfc09ca0f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java @@ -27,7 +27,7 @@ import java.util.Arrays; public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase { - private Analyzer a = new UAX29URLEmailAnalyzer(); + private Analyzer a = new UAX29URLEmailAnalyzer(TEST_VERSION_CURRENT); public void testHugeDoc() throws IOException { StringBuilder sb = new StringBuilder(); @@ -344,6 +344,6 @@ public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new UAX29URLEmailAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new UAX29URLEmailAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index fab1b752e5a..865e6c6b46f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -47,7 +47,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -56,7 +56,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; @@ -103,7 +103,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { private Analyzer urlAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setMaxTokenLength(Integer.MAX_VALUE); // Tokenize arbitrary length URLs TokenFilter filter = new URLFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); @@ -113,7 +113,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { private Analyzer emailAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); TokenFilter filter = new EmailFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java index 91b0be7c3f8..e668a9da770 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; /** * Test the CzechAnalyzer @@ -32,25 +33,25 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestCzechAnalyzer extends BaseTokenStreamTestCase { public void testStopWord() throws Exception { - assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem", + assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); } public void testReusableTokenStream() throws Exception { - Analyzer analyzer = new CzechAnalyzer(); + Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); assertAnalyzesTo(analyzer, "Česká Republika", new String[] { "česk", "republik" }); } public void testWithStemExclusionSet() throws IOException{ - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); - CzechAnalyzer cz = new CzechAnalyzer(CharArraySet.EMPTY_SET, set); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"}); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new CzechAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new CzechAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java index c7b4c7ee072..36cdd59f49f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java @@ -41,7 +41,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how masculine noun forms conflate */ public void testMasculineNouns() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); /* animate ending with a hard consonant */ assertAnalyzesTo(cz, "pán", new String[] { "pán" }); @@ -109,7 +109,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how feminine noun forms conflate */ public void testFeminineNouns() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); /* ending with hard consonant */ assertAnalyzesTo(cz, "kost", new String[] { "kost" }); @@ -153,7 +153,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how neuter noun forms conflate */ public void testNeuterNouns() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); /* ending with o */ assertAnalyzesTo(cz, "město", new String[] { "měst" }); @@ -196,7 +196,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test showing how adjectival forms conflate */ public void testAdjectives() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); /* ending with ý/á/é */ assertAnalyzesTo(cz, "mladý", new String[] { "mlad" }); @@ -224,7 +224,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test some possessive suffixes */ public void testPossessive() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(cz, "Karlův", new String[] { "karl" }); assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" }); } @@ -233,7 +233,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test some exceptional rules, implemented as rewrites. */ public void testExceptions() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); /* rewrite of št -> sk */ assertAnalyzesTo(cz, "český", new String[] { "česk" }); @@ -273,13 +273,13 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase { * Test that very short words are not stemmed. */ public void testDontStem() throws IOException { - CzechAnalyzer cz = new CzechAnalyzer(); + CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(cz, "e", new String[] { "e" }); assertAnalyzesTo(cz, "zi", new String[] { "zi" }); } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(new StringReader("hole desek")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java index a0a591060b5..eddf531d0de 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java @@ -27,12 +27,12 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new DanishAnalyzer(); + new DanishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new DanishAnalyzer(); + Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "undersøg", "undersøg"); checkOneTerm(a, "undersøgelse", "undersøg"); @@ -42,8 +42,8 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("undersøgelse"), false); - Analyzer a = new DanishAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false); + Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT, DanishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "undersøgelse", "undersøgelse"); checkOneTerm(a, "undersøg", "undersøg"); @@ -51,6 +51,6 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new DanishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new DanishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java index fd0cf20e583..731dc2b1789 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java @@ -28,16 +28,16 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestGermanAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { - Analyzer a = new GermanAnalyzer(); + Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT); checkOneTerm(a, "Tisch", "tisch"); checkOneTerm(a, "Tische", "tisch"); checkOneTerm(a, "Tischen", "tisch"); } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet( 1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("fischen"); - final LowerCaseTokenizer in = new LowerCaseTokenizer(); + final LowerCaseTokenizer in = new LowerCaseTokenizer(TEST_VERSION_CURRENT); in.setReader(new StringReader("Fischen Trinken")); GermanStemFilter filter = new GermanStemFilter( new SetKeywordMarkerFilter(in, set)); @@ -45,8 +45,8 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { } public void testStemExclusionTable() throws Exception { - GermanAnalyzer a = new GermanAnalyzer( CharArraySet.EMPTY_SET, - new CharArraySet( asSet("tischen"), false)); + GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, + new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false)); checkOneTerm(a, "tischen", "tischen"); } @@ -54,7 +54,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer */ public void testGermanSpecials() throws Exception { - GermanAnalyzer a = new GermanAnalyzer(); + GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT); // a/o/u + e is equivalent to the umlaut form checkOneTerm(a, "Schaltflächen", "schaltflach"); checkOneTerm(a, "Schaltflaechen", "schaltflach"); @@ -62,6 +62,6 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new GermanAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new GermanAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java index b9a6bd31098..7ed23a0c161 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java @@ -49,7 +49,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java index a8b7e7b1e91..830cb7e52d5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java @@ -56,7 +56,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java index 1b49c409ccd..3132787f292 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java @@ -44,7 +44,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false); return new TokenStreamComponents(t, - new GermanStemFilter(new LowerCaseFilter(t))); + new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); } }; @@ -55,7 +55,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java index d416898cb08..86dae1ee6d1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java @@ -31,7 +31,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { * @throws Exception in case an error occurs */ public void testAnalyzer() throws Exception { - Analyzer a = new GreekAnalyzer(); + Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); // Verify the correct analysis of capitals and small accented letters, and // stemming assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", @@ -48,7 +48,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - Analyzer a = new GreekAnalyzer(); + Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); // Verify the correct analysis of capitals and small accented letters, and // stemming assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", @@ -66,6 +66,6 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new GreekAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new GreekAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java index cd5a2c1b105..50813edaf2c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; public class TestGreekStemmer extends BaseTokenStreamTestCase { - Analyzer a = new GreekAnalyzer(); + Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT); public void testMasculineNouns() throws Exception { // -ος diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java index 3844cbd7960..82b2b036be1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java @@ -27,12 +27,12 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new EnglishAnalyzer(); + new EnglishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new EnglishAnalyzer(); + Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "books", "book"); checkOneTerm(a, "book", "book"); @@ -46,8 +46,8 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("books"), false); - Analyzer a = new EnglishAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false); + Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT, EnglishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "books", "books"); checkOneTerm(a, "book", "book"); @@ -55,6 +55,6 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new EnglishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new EnglishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java index 36fbf4f5dfc..8c9b377ad19 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java @@ -53,7 +53,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase { } public void testWithKeywordAttribute() throws IOException { - CharArraySet set = new CharArraySet( 1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("yourselves yours")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java index 9a6c06f0d31..2338906e810 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java @@ -27,12 +27,12 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new SpanishAnalyzer(); + new SpanishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new SpanishAnalyzer(); + Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "chicana", "chican"); checkOneTerm(a, "chicano", "chican"); @@ -42,8 +42,8 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("chicano"), false); - Analyzer a = new SpanishAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false); + Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT, SpanishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "chicana", "chican"); checkOneTerm(a, "chicano", "chicano"); @@ -51,6 +51,6 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SpanishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SpanishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java index d398ec9d437..ca9aa67c151 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java @@ -27,12 +27,12 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new BasqueAnalyzer(); + new BasqueAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new BasqueAnalyzer(); + Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "zaldi", "zaldi"); checkOneTerm(a, "zaldiak", "zaldi"); @@ -42,8 +42,8 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("zaldiak"), false); - Analyzer a = new BasqueAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false); + Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT, BasqueAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "zaldiak", "zaldiak"); checkOneTerm(a, "mendiari", "mendi"); @@ -51,6 +51,6 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new BasqueAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new BasqueAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java index 67dace3253c..64510cf40b5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java @@ -31,7 +31,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * This test fails with NPE when the stopwords file is missing in classpath */ public void testResourcesAvailable() { - new PersianAnalyzer(); + new PersianAnalyzer(TEST_VERSION_CURRENT); } /** @@ -42,7 +42,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar */ public void testBehaviorVerbs() throws Exception { - Analyzer a = new PersianAnalyzer(); + Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); // active present indicative assertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" }); // active preterite indicative @@ -118,7 +118,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar */ public void testBehaviorVerbsDefective() throws Exception { - Analyzer a = new PersianAnalyzer(); + Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); // active present indicative assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" }); // active preterite indicative @@ -189,7 +189,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * nouns, removing the plural -ha. */ public void testBehaviorNouns() throws Exception { - Analyzer a = new PersianAnalyzer(); + Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" }); assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" }); } @@ -199,7 +199,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * (lowercased, etc) */ public void testBehaviorNonPersian() throws Exception { - Analyzer a = new PersianAnalyzer(); + Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "English test.", new String[] { "english", "test" }); } @@ -207,7 +207,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * Basic test ensuring that tokenStream works correctly. */ public void testReusableTokenStream() throws Exception { - Analyzer a = new PersianAnalyzer(); + Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "خورده مي شده بوده باشد", new String[] { "خورده" }); assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" }); } @@ -216,14 +216,14 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase { * Test that custom stopwords work, and are not case-sensitive. */ public void testCustomStopwords() throws Exception { - PersianAnalyzer a = new PersianAnalyzer( - new CharArraySet( asSet("the", "and", "a"), false)); + PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, + new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false)); assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new PersianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new PersianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java index 3fb7ce85369..e3ef862e6a2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java @@ -27,12 +27,12 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new FinnishAnalyzer(); + new FinnishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new FinnishAnalyzer(); + Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "edeltäjiinsä", "edeltäj"); checkOneTerm(a, "edeltäjistään", "edeltäj"); @@ -42,8 +42,8 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("edeltäjistään"), false); - Analyzer a = new FinnishAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); + Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT, FinnishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "edeltäjiinsä", "edeltäj"); checkOneTerm(a, "edeltäjistään", "edeltäjistään"); @@ -51,6 +51,6 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new FinnishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new FinnishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java index 985ec1abc5c..2b02b8c0e2b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java @@ -48,7 +48,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("edeltäjistään"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java index c4936ae918d..6680b7e57f2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java @@ -32,7 +32,7 @@ import org.apache.lucene.util.Version; public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { public void testAnalyzer() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(); + FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(fa, "", new String[] { }); @@ -115,7 +115,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { } public void testReusableTokenStream() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(); + FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); // stopwords assertAnalyzesTo( fa, @@ -136,20 +136,20 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { } public void testExclusionTableViaCtor() throws Exception { - CharArraySet set = new CharArraySet( 1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("habitable"); - FrenchAnalyzer fa = new FrenchAnalyzer( + FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" }); - fa = new FrenchAnalyzer( CharArraySet.EMPTY_SET, set); + fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", "chist" }); } public void testElision() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(); + FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouil" }); } @@ -157,18 +157,18 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { * Test that stopwords are not case sensitive */ public void testStopwordsCasing() throws IOException { - FrenchAnalyzer a = new FrenchAnalyzer(); + FrenchAnalyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "Votre", new String[] { }); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new FrenchAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new FrenchAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } /** test accent-insensitive */ public void testAccentInsensitive() throws Exception { - Analyzer a = new FrenchAnalyzer(); + Analyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT); checkOneTerm(a, "sécuritaires", "securitair"); checkOneTerm(a, "securitaires", "securitair"); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java index ce0a038fa82..354a252f640 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java @@ -179,7 +179,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java index 5330e0ed67c..62e06cbc523 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java @@ -58,7 +58,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java index 994dff5adda..8db7c66e5b1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java @@ -27,12 +27,12 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new IrishAnalyzer(); + new IrishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new IrishAnalyzer(); + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "siopadóireacht", "siopadóir"); checkOneTerm(a, "síceapatacha", "síceapaite"); @@ -42,15 +42,15 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** test use of elisionfilter */ public void testContractions() throws IOException { - Analyzer a = new IrishAnalyzer(); + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "b'fhearr m'athair", new String[] { "fearr", "athair" }); } /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("feirmeoireacht"), false); - Analyzer a = new IrishAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("feirmeoireacht"), false); + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT, IrishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "feirmeoireacht", "feirmeoireacht"); checkOneTerm(a, "siopadóireacht", "siopadóir"); @@ -58,7 +58,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** test special hyphen handling */ public void testHyphens() throws IOException { - Analyzer a = new IrishAnalyzer(); + Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "n-athair", new String[] { "athair" }, new int[] { 2 }); @@ -66,6 +66,6 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new IrishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new IrishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java index 3d5e47e08d5..0ce5d21aaba 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java @@ -27,12 +27,12 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new GalicianAnalyzer(); + new GalicianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new GalicianAnalyzer(); + Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "correspondente", "correspond"); checkOneTerm(a, "corresponderá", "correspond"); @@ -42,8 +42,8 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("correspondente"), false); - Analyzer a = new GalicianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("correspondente"), false); + Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT, GalicianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "correspondente", "correspondente"); checkOneTerm(a, "corresponderá", "correspond"); @@ -51,6 +51,6 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new GalicianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new GalicianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java index b309bb422a5..8980d07e2a3 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java @@ -54,7 +54,7 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("elefantes"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("elefantes"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java index be9eadaf9de..bfb4f77f543 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java @@ -28,25 +28,25 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new HindiAnalyzer(); + new HindiAnalyzer(TEST_VERSION_CURRENT); } public void testBasics() throws Exception { - Analyzer a = new HindiAnalyzer(); + Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT); // two ways to write 'hindi' itself. checkOneTerm(a, "हिन्दी", "हिंद"); checkOneTerm(a, "हिंदी", "हिंद"); } public void testExclusionSet() throws Exception { - CharArraySet exclusionSet = new CharArraySet( asSet("हिंदी"), false); - Analyzer a = new HindiAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("हिंदी"), false); + Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT, HindiAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "हिंदी", "हिंदी"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new HindiAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new HindiAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java index 5caff3fb30d..a395def06ac 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java @@ -27,12 +27,12 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new HungarianAnalyzer(); + new HungarianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new HungarianAnalyzer(); + Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "babakocsi", "babakocs"); checkOneTerm(a, "babakocsijáért", "babakocs"); @@ -42,8 +42,8 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("babakocsi"), false); - Analyzer a = new HungarianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); + Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT, HungarianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "babakocsi", "babakocsi"); checkOneTerm(a, "babakocsijáért", "babakocs"); @@ -51,6 +51,6 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new HungarianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new HungarianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java index 46478894113..38213e068ff 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java @@ -49,7 +49,7 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("babakocsi"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java index 9deaca10795..5656f6ee753 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java @@ -64,7 +64,7 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase { // assert with keyword marker tokenizer = whitespaceMockTokenizer("lucene is awesome"); - CharArraySet set = new CharArraySet( Arrays.asList("Lucene"), true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true); filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary); assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1}); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java index 2e046189d01..ef74e391b46 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java @@ -27,12 +27,12 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new ArmenianAnalyzer(); + new ArmenianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new ArmenianAnalyzer(); + Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "արծիվ", "արծ"); checkOneTerm(a, "արծիվներ", "արծ"); @@ -42,8 +42,8 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("արծիվներ"), false); - Analyzer a = new ArmenianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("արծիվներ"), false); + Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT, ArmenianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "արծիվներ", "արծիվներ"); checkOneTerm(a, "արծիվ", "արծ"); @@ -51,6 +51,6 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ArmenianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ArmenianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java index a134b31c015..ce3cd6edd20 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java @@ -27,12 +27,12 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new IndonesianAnalyzer(); + new IndonesianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new IndonesianAnalyzer(); + Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "peledakan", "ledak"); checkOneTerm(a, "pembunuhan", "bunuh"); @@ -42,8 +42,8 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("peledakan"), false); - Analyzer a = new IndonesianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false); + Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT, IndonesianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "peledakan", "peledakan"); checkOneTerm(a, "pembunuhan", "bunuh"); @@ -51,6 +51,6 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new IndonesianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new IndonesianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java index 4fe6f96ecd5..c93781cf7e1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java @@ -30,12 +30,12 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new ItalianAnalyzer(); + new ItalianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new ItalianAnalyzer(); + Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "abbandonata", "abbandonat"); checkOneTerm(a, "abbandonati", "abbandonat"); @@ -45,8 +45,8 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("abbandonata"), false); - Analyzer a = new ItalianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false); + Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT, ItalianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "abbandonata", "abbandonata"); checkOneTerm(a, "abbandonati", "abbandonat"); @@ -54,12 +54,12 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new ItalianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new ItalianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } /** test that the elisionfilter is working */ public void testContractions() throws IOException { - Analyzer a = new ItalianAnalyzer(); + Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" }); assertAnalyzesTo(a, "l'Italiano", new String[] { "italian" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java index 4bf69a503cf..33ac2e3f12b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java @@ -27,12 +27,12 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new LatvianAnalyzer(); + new LatvianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new LatvianAnalyzer(); + Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "tirgiem", "tirg"); checkOneTerm(a, "tirgus", "tirg"); @@ -42,8 +42,8 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("tirgiem"), false); - Analyzer a = new LatvianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false); + Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT, LatvianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "tirgiem", "tirgiem"); checkOneTerm(a, "tirgus", "tirg"); @@ -51,6 +51,6 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new LatvianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new LatvianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java index 0d1141ef77a..96998d5d046 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java @@ -37,7 +37,7 @@ import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*; /** Tests {@link CapitalizationFilter} */ public class TestCapitalizationFilter extends BaseTokenStreamTestCase { public void testCapitalization() throws Exception { - CharArraySet keep = new CharArraySet( + CharArraySet keep = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("and", "the", "it", "BIG"), false); assertCapitalizesTo("kiTTEN", new String[] { "Kitten" }, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java index 442cfe2b54f..2a158ec2367 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java @@ -31,7 +31,7 @@ import org.junit.Test; public class TestCodepointCountFilter extends BaseTokenStreamTestCase { public void testFilterWithPosIncr() throws Exception { TokenStream stream = whitespaceMockTokenizer("short toolong evenmuchlongertext a ab toolong foo"); - CodepointCountFilter filter = new CodepointCountFilter(stream, 2, 6); + CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6); assertTokenStreamContents(filter, new String[]{"short", "ab", "foo"}, new int[]{1, 4, 2} @@ -43,7 +43,7 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new CodepointCountFilter(tokenizer, 0, 5)); + return new TokenStreamComponents(tokenizer, new CodepointCountFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); } }; checkOneTerm(a, "", ""); @@ -63,7 +63,7 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { boolean expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(); ((Tokenizer)stream).setReader(new StringReader(text)); - stream = new CodepointCountFilter(stream, min, max); + stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max); stream.reset(); assertEquals(expected, stream.incrementToken()); stream.end(); @@ -76,6 +76,6 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { */ @Test(expected = IllegalArgumentException.class) public void testIllegalArguments() throws Exception { - new CodepointCountFilter(whitespaceMockTokenizer("accept only valid arguments"), 4, 1); + new CodepointCountFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), 4, 1); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java index 50b5edcd717..465c54c31f1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java @@ -40,12 +40,12 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase { // Test Stopwords TokenStream stream = whitespaceMockTokenizer(input); - stream = new KeepWordFilter(stream, new CharArraySet( words, true)); + stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true)); assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 }); // Now force case stream = whitespaceMockTokenizer(input); - stream = new KeepWordFilter(stream, new CharArraySet(words, false)); + stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false)); assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 }); } @@ -60,7 +60,7 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenStream stream = new KeepWordFilter(tokenizer, new CharArraySet( words, true)); + TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true)); return new TokenStreamComponents(tokenizer, stream); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java index eecc9e81aa1..a2853060d9a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java @@ -38,13 +38,13 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { @Test public void testSetFilterIncrementToken() throws IOException { - CharArraySet set = new CharArraySet( 5, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true); set.add("lucenefox"); String[] output = new String[] { "the", "quick", "brown", "LuceneFox", "jumps" }; assertTokenStreamContents(new LowerCaseFilterMock( new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), set)), output); - CharArraySet mixedCaseSet = new CharArraySet( asSet("LuceneFox"), false); + CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false); assertTokenStreamContents(new LowerCaseFilterMock( new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), mixedCaseSet)), output); CharArraySet set2 = set; @@ -72,8 +72,8 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { new SetKeywordMarkerFilter( new SetKeywordMarkerFilter( whitespaceMockTokenizer("Dogs Trees Birds Houses"), - new CharArraySet( asSet("Birds", "Houses"), false)), - new CharArraySet( asSet("Dogs", "Trees"), false))); + new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)), + new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false))); assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); @@ -91,7 +91,7 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { new PatternKeywordMarkerFilter( whitespaceMockTokenizer("Dogs Trees Birds Houses"), Pattern.compile("Birds|Houses")), - new CharArraySet( asSet("Dogs", "Trees"), false))); + new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false))); assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java index 89e377f4fc7..0aa47149601 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java @@ -33,7 +33,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { public void testFilterWithPosIncr() throws Exception { TokenStream stream = whitespaceMockTokenizer("short toolong evenmuchlongertext a ab toolong foo"); - LengthFilter filter = new LengthFilter(stream, 2, 6); + LengthFilter filter = new LengthFilter(TEST_VERSION_CURRENT, stream, 2, 6); assertTokenStreamContents(filter, new String[]{"short", "ab", "foo"}, new int[]{1, 4, 2} @@ -45,7 +45,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new LengthFilter(tokenizer, 0, 5)); + return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5)); } }; checkOneTerm(a, "", ""); @@ -56,6 +56,6 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { */ @Test(expected = IllegalArgumentException.class) public void testIllegalArguments() throws Exception { - new LengthFilter(whitespaceMockTokenizer("accept only valid arguments"), -4, -1); + new LengthFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), -4, -1); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java index 7a20192c557..75a9ab067a6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java @@ -211,7 +211,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testPositionIncrements() throws Exception { final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - final CharArraySet protWords = new CharArraySet(new HashSet<>(Arrays.asList("NUTCH")), false); + final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new Analyzer() { @@ -300,7 +300,8 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET); + StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, + tokenizer, StandardAnalyzer.STOP_WORDS_SET); return new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(filter, flags, protWords)); } }; @@ -332,7 +333,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = random().nextInt(512); final CharArraySet protectedWords; if (random().nextBoolean()) { - protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -355,7 +356,7 @@ public class TestLucene47WordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = i; final CharArraySet protectedWords; if (random.nextBoolean()) { - protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java index ff833866472..af69fa851e2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java @@ -39,10 +39,10 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase { String text = "Qwerty"; Map analyzerPerField = - Collections.singletonMap("special", new SimpleAnalyzer()); + Collections.singletonMap("special", new SimpleAnalyzer(TEST_VERSION_CURRENT)); PerFieldAnalyzerWrapper analyzer = - new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), analyzerPerField); + new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField); try (TokenStream tokenStream = analyzer.tokenStream("field", text)) { CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); @@ -72,8 +72,8 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase { public void testReuseWrapped() throws Exception { final String text = "Qwerty"; - final Analyzer specialAnalyzer = new SimpleAnalyzer(); - final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(); + final Analyzer specialAnalyzer = new SimpleAnalyzer(TEST_VERSION_CURRENT); + final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); TokenStream ts1, ts2, ts3, ts4; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java index 4ef1536f0d9..a75bfa038cd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java @@ -113,7 +113,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase { output.add(entry.getValue()); } } - Tokenizer tokenizer = new WhitespaceTokenizer(); + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader(input.toString())); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter( tokenizer, builder.build())); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index 1d278edcc2c..c770de15bbf 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -51,7 +51,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { new Token(new String(ccc, 0, ccc.length), 11, 15), new Token(new String(whitespace, 0, whitespace.length), 16, 20), new Token(new String(empty, 0, empty.length), 21, 21)); - ts = new TrimFilter(ts); + ts = new TrimFilter(TEST_VERSION_CURRENT, ts); assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""}); } @@ -100,7 +100,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); - return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer)); + return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer)); } }; checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); @@ -111,7 +111,8 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer)); + final Version version = TEST_VERSION_CURRENT; + return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer)); } }; checkOneTerm(a, "", ""); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index 65ed12423cc..787c9b9ab92 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -207,7 +207,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Test public void testPositionIncrements() throws Exception { final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE; - final CharArraySet protWords = new CharArraySet(new HashSet<>(Arrays.asList("NUTCH")), false); + final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ Analyzer a = new Analyzer() { @@ -275,7 +275,8 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - StopFilter filter = new StopFilter(tokenizer, StandardAnalyzer.STOP_WORDS_SET); + StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, + tokenizer, StandardAnalyzer.STOP_WORDS_SET); return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, filter, flags, protWords)); } }; @@ -341,7 +342,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = random().nextInt(512); final CharArraySet protectedWords; if (random().nextBoolean()) { - protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -366,7 +367,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = random().nextInt(512); final CharArraySet protectedWords; if (random().nextBoolean()) { - protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } @@ -390,7 +391,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { final int flags = i; final CharArraySet protectedWords; if (random.nextBoolean()) { - protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false); + protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("a", "b", "cd")), false); } else { protectedWords = null; } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java index 914f5230570..062bfc16632 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java @@ -160,7 +160,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testReset() throws Exception { - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader("abcde")); EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 3); assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5}); @@ -200,7 +200,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testGraphs() throws IOException { - TokenStream tk = new LetterTokenizer(); + TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT); ((Tokenizer)tk).setReader(new StringReader("abc d efgh ij klmno p q")); tk = new ShingleFilter(tk); tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java index 22ad2318651..5dfc9f4df94 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java @@ -113,7 +113,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase { } public void testReset() throws Exception { - WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader("abcde")); NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0}); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java index 939e8d3999b..1f554b44e0b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java @@ -114,14 +114,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { } public void testSnowballCorrectness() throws Exception { - Analyzer a = new DutchAnalyzer(); + Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); checkOneTerm(a, "opheffen", "opheff"); checkOneTerm(a, "opheffende", "opheff"); checkOneTerm(a, "opheffing", "opheff"); } public void testReusableTokenStream() throws Exception { - Analyzer a = new DutchAnalyzer(); + Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); checkOneTerm(a, "lichaamsziek", "lichaamsziek"); checkOneTerm(a, "lichamelijk", "licham"); checkOneTerm(a, "lichamelijke", "licham"); @@ -129,12 +129,12 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { } public void testExclusionTableViaCtor() throws IOException { - CharArraySet set = new CharArraySet( 1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("lichamelijk"); - DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set); + DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); - a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set); + a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); } @@ -144,12 +144,12 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { * even if you use a non-default ctor. */ public void testStemOverrides() throws IOException { - DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET); + DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); checkOneTerm(a, "fiets", "fiets"); } public void testEmptyStemDictionary() throws IOException { - DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, + DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET, CharArrayMap.emptyMap()); checkOneTerm(a, "fiets", "fiet"); } @@ -158,17 +158,17 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { * Test that stopwords are not case sensitive */ public void testStopwordsCasing() throws IOException { - DutchAnalyzer a = new DutchAnalyzer(); + DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "Zelf", new String[] { }); } private void check(final String input, final String expected) throws Exception { - checkOneTerm(new DutchAnalyzer(), input, expected); + checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new DutchAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new DutchAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java index f3900f87c90..98fb8f66964 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java @@ -27,12 +27,12 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new NorwegianAnalyzer(); + new NorwegianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new NorwegianAnalyzer(); + Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "havnedistriktene", "havnedistrikt"); checkOneTerm(a, "havnedistrikter", "havnedistrikt"); @@ -42,8 +42,8 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("havnedistriktene"), false); - Analyzer a = new NorwegianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("havnedistriktene"), false); + Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT, NorwegianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "havnedistriktene", "havnedistriktene"); checkOneTerm(a, "havnedistrikter", "havnedistrikt"); @@ -51,6 +51,6 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new NorwegianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new NorwegianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java index 278577381f8..f5fa09f238e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java @@ -66,7 +66,7 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("sekretæren"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java index a0dbc8671d6..c04ad4f8c01 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java @@ -65,7 +65,7 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("sekretæren"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java index 4c5cce54950..402cf5c3579 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java @@ -27,12 +27,12 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new PortugueseAnalyzer(); + new PortugueseAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new PortugueseAnalyzer(); + Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "quilométricas", "quilometric"); checkOneTerm(a, "quilométricos", "quilometric"); @@ -42,8 +42,8 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); - Analyzer a = new PortugueseAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); + Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT, PortugueseAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "quilométricas", "quilométricas"); checkOneTerm(a, "quilométricos", "quilometric"); @@ -51,6 +51,6 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new PortugueseAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new PortugueseAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java index d04980f3eb8..02a20de016c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java @@ -92,7 +92,7 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java index c7a4ebad289..585993380ea 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java @@ -66,7 +66,7 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java index 24421d1f12d..39a6c685bbc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java @@ -66,7 +66,7 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java index 5322a3e561e..b0a6c24e69b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java @@ -64,7 +64,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public void testNoStopwords() throws Exception { // Note: an empty list of fields passed in - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Collections.emptyList(), 1); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.emptyList(), 1); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "quick"); assertTokenStreamContents(protectedTokenStream, new String[]{"quick"}); @@ -73,13 +73,13 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { } public void testDefaultStopwordsAllFields() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring } public void testStopwordsAllFieldsMaxPercentDocs() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, 1f / 2f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on terms in > one half of docs remove boring @@ -89,36 +89,36 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { // A filter on terms in > half of docs should not remove vaguelyBoring assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"}); - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, 1f / 4f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f); protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > quarter of docs should remove vaguelyBoring assertTokenStreamContents(protectedTokenStream, new String[0]); } public void testStopwordsPerFieldMaxPercentDocs() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on one Field should not affect queries on another assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f); protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on the right Field should affect queries on it assertTokenStreamContents(protectedTokenStream, new String[0]); } public void testStopwordsPerFieldMaxDocFreq() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); int numStopWords = protectedAnalyzer.getStopWords("repetitiveField").length; assertTrue("Should have identified stop words", numStopWords > 0); - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10); int numNewStopWords = protectedAnalyzer.getStopWords("repetitiveField").length + protectedAnalyzer.getStopWords("variedField").length; assertTrue("Should have identified more stop words", numNewStopWords > numStopWords); } public void testNoFieldNamePollution() throws Exception { - protectedAnalyzer = new QueryAutoStopWordAnalyzer( appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // Check filter set up OK @@ -131,6 +131,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public void testTokenStream() throws Exception { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer( + TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), reader, 10); TokenStream ts = a.tokenStream("repetitiveField", "this boring"); assertTokenStreamContents(ts, new String[] { "this" }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java index 750db7d1c62..ac7d225cc7c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java @@ -33,63 +33,63 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase { public void testFilter() throws Exception { TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); // 1-4 length string ((Tokenizer)stream).setReader(new StringReader("Do have a nice day")); - ReverseStringFilter filter = new ReverseStringFilter(stream); + ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream); assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" }); } public void testFilterWithMark() throws Exception { TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); // 1-4 length string ((Tokenizer)stream).setReader(new StringReader("Do have a nice day")); - ReverseStringFilter filter = new ReverseStringFilter(stream, '\u0001'); + ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001'); assertTokenStreamContents(filter, new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" }); } public void testReverseString() throws Exception { - assertEquals( "A", ReverseStringFilter.reverse( "A" ) ); - assertEquals( "BA", ReverseStringFilter.reverse( "AB" ) ); - assertEquals( "CBA", ReverseStringFilter.reverse( "ABC" ) ); + assertEquals( "A", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "A" ) ); + assertEquals( "BA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "AB" ) ); + assertEquals( "CBA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "ABC" ) ); } public void testReverseChar() throws Exception { char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' }; - ReverseStringFilter.reverse( buffer, 2, 3 ); + ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 2, 3 ); assertEquals( "ABEDCF", new String( buffer ) ); } public void testReverseSupplementary() throws Exception { // supplementary at end - assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse("瀛愯䇹鍟艱𩬅")); + assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅")); // supplementary at end - 1 - assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse("瀛愯䇹鍟艱𩬅a")); + assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅a")); // supplementary at start - assertEquals("fedcba𩬅", ReverseStringFilter.reverse("𩬅abcdef")); + assertEquals("fedcba𩬅", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "𩬅abcdef")); // supplementary at start + 1 - assertEquals("fedcba𩬅z", ReverseStringFilter.reverse("z𩬅abcdef")); + assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "z𩬅abcdef")); // supplementary medial - assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse("abcd𩬅efg")); + assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "abcd𩬅efg")); } public void testReverseSupplementaryChar() throws Exception { // supplementary at end char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray(); - ReverseStringFilter.reverse(buffer, 3, 7); + ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7); assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer)); // supplementary at end - 1 buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray(); - ReverseStringFilter.reverse(buffer, 3, 8); + ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8); assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer)); // supplementary at start buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray(); - ReverseStringFilter.reverse(buffer, 3, 7); + ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7); assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer)); // supplementary at start + 1 buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray(); - ReverseStringFilter.reverse(buffer, 3, 8); + ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8); assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer)); // supplementary medial buffer = "abc瀛愯𩬅def".toCharArray(); - ReverseStringFilter.reverse(buffer, 3, 7); + ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7); assertEquals("abcfed𩬅愯瀛", new String(buffer)); } @@ -99,7 +99,7 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - return new TokenStreamComponents(tokenizer, new ReverseStringFilter(tokenizer)); + return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer)); } }; checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); @@ -110,7 +110,7 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); - return new TokenStreamComponents(tokenizer, new ReverseStringFilter(tokenizer)); + return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer)); } }; checkOneTerm(a, "", ""); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java index 7af63248732..03c96d096ff 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java @@ -27,12 +27,12 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new RomanianAnalyzer(); + new RomanianAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new RomanianAnalyzer(); + Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "absenţa", "absenţ"); checkOneTerm(a, "absenţi", "absenţ"); @@ -42,8 +42,8 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("absenţa"), false); - Analyzer a = new RomanianAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false); + Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT, RomanianAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "absenţa", "absenţa"); checkOneTerm(a, "absenţi", "absenţ"); @@ -51,6 +51,6 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new RomanianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new RomanianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java index f59a5f8a9f0..fbc683675e1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java @@ -33,12 +33,12 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { /** Check that RussianAnalyzer doesnt discard any numbers */ public void testDigitsInRussianCharset() throws IOException { - RussianAnalyzer ra = new RussianAnalyzer(); + RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" }); } public void testReusableTokenStream() throws Exception { - Analyzer a = new RussianAnalyzer(); + Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" }); assertAnalyzesTo(a, "Но знание это хранилось в тайне", @@ -47,9 +47,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { public void testWithStemExclusionSet() throws Exception { - CharArraySet set = new CharArraySet( 1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("представление"); - Analyzer a = new RussianAnalyzer( RussianAnalyzer.getDefaultStopSet() , set); + Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set); assertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); @@ -57,6 +57,6 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new RussianAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new RussianAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java index 1d4a381e9d0..9433c9599ab 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java @@ -49,7 +49,7 @@ public class TestRussianLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("энергии"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("энергии"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java index 514ad9a3095..bf747bdd8dc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java @@ -309,9 +309,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { Analyzer delegate = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - CharArraySet stopSet = StopFilter.makeStopSet("into"); + CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "into"); Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenFilter filter = new StopFilter(tokenizer, stopSet); + TokenFilter filter = new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet); return new TokenStreamComponents(tokenizer, filter); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java index 83bca7dcc81..d407277dd27 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java @@ -981,7 +981,7 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase { } public void testReset() throws Exception { - Tokenizer wsTokenizer = new WhitespaceTokenizer(); + Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); wsTokenizer.setReader(new StringReader("please divide this sentence")); TokenStream filter = new ShingleFilter(wsTokenizer, 2); assertTokenStreamContents(filter, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java index eeb81a46508..addf2d8b20c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java @@ -165,7 +165,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"}); source1.reset(); - TokenStream lowerCasing = new LowerCaseFilter(source1); + TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); String[] lowerCaseTokens = new String[tokens1.length]; for (int i = 0; i < tokens1.length; i++) lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT); @@ -173,7 +173,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { } private StandardTokenizer standardTokenizer(StringBuilder builder) throws IOException { - StandardTokenizer tokenizer = new StandardTokenizer(); + StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader(builder.toString())); return tokenizer; } @@ -191,10 +191,10 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' '); } //make sure we produce the same tokens - TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer))); + TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer))); TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100)); teeStream.consumeAllTokens(); - TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100); + TokenStream stream = new ModuloTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer)), 100); CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class); CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class); for (int i=0; stream.incrementToken(); i++) { @@ -207,12 +207,12 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { int tfPos = 0; long start = System.currentTimeMillis(); for (int i = 0; i < 20; i++) { - stream = new StandardFilter(standardTokenizer(buffer)); + stream = new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer)); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { tfPos += posIncrAtt.getPositionIncrement(); } - stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), modCounts[j]); + stream = new ModuloTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer)), modCounts[j]); posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { tfPos += posIncrAtt.getPositionIncrement(); @@ -224,7 +224,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { //simulate one field with one sink start = System.currentTimeMillis(); for (int i = 0; i < 20; i++) { - teeStream = new TeeSinkTokenFilter(new StandardFilter( standardTokenizer(buffer))); + teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, standardTokenizer(buffer))); sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j])); PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class); while (teeStream.incrementToken()) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java index 6220f9ce5f9..75525559a71 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java @@ -27,12 +27,12 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new SwedishAnalyzer(); + new SwedishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new SwedishAnalyzer(); + Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "jaktkarlarne", "jaktkarl"); checkOneTerm(a, "jaktkarlens", "jaktkarl"); @@ -42,8 +42,8 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet( asSet("jaktkarlarne"), false); - Analyzer a = new SwedishAnalyzer( + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false); + Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT, SwedishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "jaktkarlarne", "jaktkarlarne"); checkOneTerm(a, "jaktkarlens", "jaktkarl"); @@ -51,6 +51,6 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new SwedishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new SwedishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java index ef87a8e3a67..c25ade6b6a9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java @@ -49,7 +49,7 @@ public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet( asSet("jaktkarlens"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java index 36897b7c5bd..59d85c26615 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java @@ -100,7 +100,7 @@ public class TestSolrSynonymParser extends BaseTokenStreamTestCase { @Test(expected=ParseException.class) public void testInvalidPositionsInput() throws Exception { String testFile = "testola => the test"; - SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer()); + SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT)); parser.parse(new StringReader(testFile)); } @@ -108,7 +108,7 @@ public class TestSolrSynonymParser extends BaseTokenStreamTestCase { @Test(expected=ParseException.class) public void testInvalidPositionsOutput() throws Exception { String testFile = "the test => testola"; - SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer()); + SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT)); parser.parse(new StringReader(testFile)); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java index 48c53a8c33c..85bd371e73b 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java @@ -45,7 +45,7 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(asSet("ağacı"), false); + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ağacı"), false); Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT, TurkishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "ağacı", "ağacı"); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java index fdc830fb2f5..9c137c21372 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java @@ -25,7 +25,7 @@ import org.apache.lucene.util.LuceneTestCase; public class TestCharArrayMap extends LuceneTestCase { public void doRandom(int iter, boolean ignoreCase) { - CharArrayMap map = new CharArrayMap<>(1, ignoreCase); + CharArrayMap map = new CharArrayMap<>(TEST_VERSION_CURRENT, 1, ignoreCase); HashMap hmap = new HashMap<>(); char[] key; @@ -64,7 +64,7 @@ public class TestCharArrayMap extends LuceneTestCase { } public void testMethods() { - CharArrayMap cm = new CharArrayMap<>(2, false); + CharArrayMap cm = new CharArrayMap<>(TEST_VERSION_CURRENT, 2, false); HashMap hm = new HashMap<>(); hm.put("foo",1); hm.put("bar",2); @@ -133,7 +133,7 @@ public class TestCharArrayMap extends LuceneTestCase { } public void testModifyOnUnmodifiable(){ - CharArrayMap map = new CharArrayMap<>(2, false); + CharArrayMap map = new CharArrayMap<>(TEST_VERSION_CURRENT, 2, false); map.put("foo",1); map.put("bar",2); final int size = map.size(); @@ -230,7 +230,7 @@ public class TestCharArrayMap extends LuceneTestCase { } public void testToString() { - CharArrayMap cm = new CharArrayMap<>(Collections.singletonMap("test",1), false); + CharArrayMap cm = new CharArrayMap<>(TEST_VERSION_CURRENT, Collections.singletonMap("test",1), false); assertEquals("[test]",cm.keySet().toString()); assertEquals("[1]",cm.values().toString()); assertEquals("[test=1]",cm.entrySet().toString()); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java index cb31b3e2ccf..9af7447588e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java @@ -35,7 +35,7 @@ public class TestCharArraySet extends LuceneTestCase { public void testRehash() throws Exception { - CharArraySet cas = new CharArraySet(0, true); + CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for(int i=0;i would not hit any element of the CAS and therefor never call // remove() on the iterator try{ - set.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true)); + set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected @@ -152,7 +152,7 @@ public class TestCharArraySet extends LuceneTestCase { } try{ - set.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true)); + set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected @@ -173,7 +173,7 @@ public class TestCharArraySet extends LuceneTestCase { } public void testUnmodifiableSet(){ - CharArraySet set = new CharArraySet(10,true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); set.add(Integer.valueOf(1)); final int size = set.size(); @@ -203,7 +203,7 @@ public class TestCharArraySet extends LuceneTestCase { "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; String[] lowerArr = new String[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; - CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); } @@ -211,7 +211,7 @@ public class TestCharArraySet extends LuceneTestCase { assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } - set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false); + set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } @@ -229,7 +229,7 @@ public class TestCharArraySet extends LuceneTestCase { String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; - CharArraySet set = new CharArraySet(Arrays + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays .asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); @@ -238,7 +238,7 @@ public class TestCharArraySet extends LuceneTestCase { assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } - set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), + set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); @@ -252,8 +252,8 @@ public class TestCharArraySet extends LuceneTestCase { @SuppressWarnings("deprecated") public void testCopyCharArraySetBWCompat() { - CharArraySet setIngoreCase = new CharArraySet(10, true); - CharArraySet setCaseSensitive = new CharArraySet(10, false); + CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); + CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList<>(); @@ -265,8 +265,8 @@ public class TestCharArraySet extends LuceneTestCase { setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); setCaseSensitive.add(Integer.valueOf(1)); - CharArraySet copy = CharArraySet.copy(setIngoreCase); - CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive); + CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); @@ -299,8 +299,8 @@ public class TestCharArraySet extends LuceneTestCase { * Test the static #copy() function with a CharArraySet as a source */ public void testCopyCharArraySet() { - CharArraySet setIngoreCase = new CharArraySet(10, true); - CharArraySet setCaseSensitive = new CharArraySet(10, false); + CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); + CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList<>(); @@ -312,8 +312,8 @@ public class TestCharArraySet extends LuceneTestCase { setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); setCaseSensitive.add(Integer.valueOf(1)); - CharArraySet copy = CharArraySet.copy(setIngoreCase); - CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive); + CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); @@ -355,7 +355,7 @@ public class TestCharArraySet extends LuceneTestCase { } set.addAll(Arrays.asList(TEST_STOP_WORDS)); - CharArraySet copy = CharArraySet.copy(set); + CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set); assertEquals(set.size(), copy.size()); assertEquals(set.size(), copy.size()); @@ -380,12 +380,12 @@ public class TestCharArraySet extends LuceneTestCase { } /** - * Tests a special case of {@link CharArraySet#copy(Set)} where the + * Tests a special case of {@link CharArraySet#copy(Version, Set)} where the * set to copy is the {@link CharArraySet#EMPTY_SET} */ public void testCopyEmptySet() { assertSame(CharArraySet.EMPTY_SET, - CharArraySet.copy(CharArraySet.EMPTY_SET)); + CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET)); } /** @@ -408,7 +408,7 @@ public class TestCharArraySet extends LuceneTestCase { * Test for NPE */ public void testContainsWithNull() { - CharArraySet set = new CharArraySet(1, true); + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); try { set.contains((char[]) null, 0, 10); fail("null value must raise NPE"); @@ -424,7 +424,7 @@ public class TestCharArraySet extends LuceneTestCase { } public void testToString() { - CharArraySet set = CharArraySet.copy(Collections.singleton("test")); + CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test")); assertEquals("[test]", set.toString()); set.add("test2"); assertTrue(set.toString().contains(", ")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java index 0ed68dab40b..a470c9fefaa 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java @@ -52,7 +52,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { } // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.insert(1023, "\ud801\udc1c"); - Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString())); assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" ")); } @@ -70,7 +70,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { builder.append("a"); } builder.append("\ud801\udc1cabc"); - Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)}); } @@ -85,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { for (int i = 0; i < 255; i++) { builder.append("A"); } - Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } @@ -100,7 +100,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { builder.append("A"); } builder.append("\ud801\udc1c"); - Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } @@ -110,7 +110,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()) { @Override protected int normalize(int c) { if (c > 0xffff) { @@ -148,7 +148,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()) { @Override protected int normalize(int c) { if (c <= 0xffff) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java index b39b4cda73b..f31f913a332 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java @@ -46,7 +46,7 @@ public class TestCharacterUtils extends LuceneTestCase { } catch (IndexOutOfBoundsException e) { } - CharacterUtils java5 = CharacterUtils.getInstance(); + CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); assertEquals((int) 'A', java5.codePointAt(cpAt3, 0)); assertEquals(Character.toCodePoint('\ud801', '\udc1c'), java5.codePointAt( cpAt3, 3)); @@ -68,7 +68,7 @@ public class TestCharacterUtils extends LuceneTestCase { assertEquals((int) '\ud801', java4.codePointAt(cpAt3, 3, 5)); assertEquals((int) '\ud801', java4.codePointAt(highSurrogateAt3, 3, 4)); - CharacterUtils java5 = CharacterUtils.getInstance(); + CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); assertEquals((int) 'A', java5.codePointAt(cpAt3, 0, 2)); assertEquals(Character.toCodePoint('\ud801', '\udc1c'), java5.codePointAt( cpAt3, 3, 5)); @@ -78,7 +78,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testCodePointCount() { CharacterUtils java4 = CharacterUtils.getJava4Instance(); - CharacterUtils java5 = CharacterUtils.getInstance(); + CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); final String s = TestUtil.randomUnicodeString(random()); assertEquals(s.length(), java4.codePointCount(s)); assertEquals(Character.codePointCount(s, 0, s.length()), java5.codePointCount(s)); @@ -87,7 +87,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testOffsetByCodePoint() { CharacterUtils java4 = CharacterUtils.getJava4Instance(); - CharacterUtils java5 = CharacterUtils.getInstance(); + CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); for (int i = 0; i < 10; ++i) { final char[] s = TestUtil.randomUnicodeString(random()).toCharArray(); final int index = TestUtil.nextInt(random(), 0, s.length); @@ -119,7 +119,7 @@ public class TestCharacterUtils extends LuceneTestCase { public void testConversions() { CharacterUtils java4 = CharacterUtils.getJava4Instance(); - CharacterUtils java5 = CharacterUtils.getInstance(); + CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT); testConversions(java4); testConversions(java5); } @@ -159,7 +159,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testFillNoHighSurrogate() throws IOException { CharacterUtils versions[] = new CharacterUtils[] { - CharacterUtils.getInstance(), + CharacterUtils.getInstance(TEST_VERSION_CURRENT), CharacterUtils.getJava4Instance() }; for (CharacterUtils instance : versions) { Reader reader = new StringReader("helloworld"); @@ -181,7 +181,7 @@ public class TestCharacterUtils extends LuceneTestCase { @Test public void testFillJava15() throws IOException { String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801"; - CharacterUtils instance = CharacterUtils.getInstance(); + CharacterUtils instance = CharacterUtils.getInstance(TEST_VERSION_CURRENT); Reader reader = new StringReader(input); CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5); assertTrue(instance.fill(buffer, reader)); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java index 61822839211..788eb373405 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java @@ -40,9 +40,9 @@ public class TestElision extends BaseTokenStreamTestCase { public void testElision() throws Exception { String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; - Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory()); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(test)); - CharArraySet articles = new CharArraySet(asSet("l", "M"), false); + CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false); TokenFilter filter = new ElisionFilter(tokenizer, articles); List tas = filter(filter); assertEquals("embrouille", tas.get(4)); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java index 2c5f9084b07..24515512cd5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java @@ -50,7 +50,8 @@ public class TestFilesystemResourceLoader extends LuceneTestCase { private void assertClasspathDelegation(ResourceLoader rl) throws Exception { // try a stopwords file from classpath CharArraySet set = WordlistLoader.getSnowballWordSet( - new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8) + new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8), + TEST_VERSION_CURRENT ); assertTrue(set.contains("you")); // try to load a class; we use string comparison because classloader may be different... diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java index bd2ebdba71f..ac33b30dc8d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java @@ -29,15 +29,15 @@ public class TestWordlistLoader extends LuceneTestCase { public void testWordlistLoading() throws IOException { String s = "ONE\n two \nthree"; - CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s)); + CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT); checkSet(wordSet1); - CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s))); + CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)), TEST_VERSION_CURRENT); checkSet(wordSet2); } public void testComments() throws Exception { String s = "ONE\n two \nthree\n#comment"; - CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#"); + CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT); checkSet(wordSet1); assertFalse(wordSet1.contains("#comment")); assertFalse(wordSet1.contains("comment")); @@ -66,7 +66,7 @@ public class TestWordlistLoader extends LuceneTestCase { " two \n" + // stopword with leading/trailing space " three four five \n" + // multiple stopwords "six seven | comment\n"; //multiple stopwords + comment - CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s)); + CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT); assertEquals(7, wordset.size()); assertTrue(wordset.contains("ONE")); assertTrue(wordset.contains("two")); diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java index a4e5c3c3847..55dd9466fe7 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.icu.segmentation; */ import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -42,7 +43,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); TokenStream result = new CJKBigramFilter(source); - return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); + return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET)); } }; @@ -60,7 +61,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { // some halfwidth katakana forms, which will affect the bigramming. TokenStream result = new ICUNormalizer2Filter(source); result = new CJKBigramFilter(source); - return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); + return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET)); } }; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java index fe3dc2e6bb6..f738e4a10b3 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ja; */ import java.io.IOException; +import java.io.Reader; import java.util.HashSet; import java.util.Set; @@ -30,6 +31,7 @@ import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.apache.lucene.analysis.ja.dict.UserDictionary; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.apache.lucene.util.Version; /** * Analyzer for Japanese that uses morphological analysis. @@ -40,12 +42,12 @@ public class JapaneseAnalyzer extends StopwordAnalyzerBase { private final Set stoptags; private final UserDictionary userDict; - public JapaneseAnalyzer() { - this(null, JapaneseTokenizer.DEFAULT_MODE, DefaultSetHolder.DEFAULT_STOP_SET, DefaultSetHolder.DEFAULT_STOP_TAGS); + public JapaneseAnalyzer(Version matchVersion) { + this(matchVersion, null, JapaneseTokenizer.DEFAULT_MODE, DefaultSetHolder.DEFAULT_STOP_SET, DefaultSetHolder.DEFAULT_STOP_TAGS); } - public JapaneseAnalyzer(UserDictionary userDict, Mode mode, CharArraySet stopwords, Set stoptags) { - super(stopwords); + public JapaneseAnalyzer(Version matchVersion, UserDictionary userDict, Mode mode, CharArraySet stopwords, Set stoptags) { + super(matchVersion, stopwords); this.userDict = userDict; this.mode = mode; this.stoptags = stoptags; @@ -87,11 +89,11 @@ public class JapaneseAnalyzer extends StopwordAnalyzerBase { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new JapaneseTokenizer(userDict, true, mode); TokenStream stream = new JapaneseBaseFormFilter(tokenizer); - stream = new JapanesePartOfSpeechStopFilter(stream, stoptags); + stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags); stream = new CJKWidthFilter(stream); - stream = new StopFilter(stream, stopwords); + stream = new StopFilter(matchVersion, stream, stopwords); stream = new JapaneseKatakanaStemFilter(stream); - stream = new LowerCaseFilter(stream); + stream = new LowerCaseFilter(matchVersion, stream); return new TokenStreamComponents(tokenizer, stream); } } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java index 11bb4e67e62..476a723abe1 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.Version; /** * Removes tokens that match a set of part-of-speech tags. @@ -32,11 +33,12 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter { /** * Create a new {@link JapanesePartOfSpeechStopFilter}. + * @param version the Lucene match version * @param input the {@link TokenStream} to consume * @param stopTags the part-of-speech tags that should be removed */ - public JapanesePartOfSpeechStopFilter(TokenStream input, Set stopTags) { - super(input); + public JapanesePartOfSpeechStopFilter(Version version, TokenStream input, Set stopTags) { + super(version, input); this.stopTags = stopTags; } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java index 3e9365ac3ac..18cc27a36da 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java @@ -70,7 +70,7 @@ public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory im public TokenStream create(TokenStream stream) { // if stoptags is null, it means the file is empty if (stopTags != null) { - final TokenStream filter = new JapanesePartOfSpeechStopFilter(stream, stopTags); + final TokenStream filter = new JapanesePartOfSpeechStopFilter(luceneMatchVersion, stream, stopTags); return filter; } else { return stream; diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java index 90cc00e83e1..65e55aac50e 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java @@ -33,7 +33,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new JapaneseAnalyzer(); + new JapaneseAnalyzer(TEST_VERSION_CURRENT); } /** @@ -42,7 +42,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { * and offsets are correct. */ public void testBasics() throws IOException { - assertAnalyzesTo(new JapaneseAnalyzer(), "多くの学生が試験に落ちた。", + assertAnalyzesTo(new JapaneseAnalyzer(TEST_VERSION_CURRENT), "多くの学生が試験に落ちた。", new String[] { "多く", "学生", "試験", "落ちる" }, new int[] { 0, 3, 6, 9 }, new int[] { 2, 5, 8, 11 }, @@ -55,7 +55,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { */ public void testDecomposition() throws IOException { - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); @@ -110,7 +110,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { ); // Kyoto University Baseball Club - assertAnalyzesToPositions(new JapaneseAnalyzer(), "京都大学硬式野球部", + assertAnalyzesToPositions(new JapaneseAnalyzer(TEST_VERSION_CURRENT), "京都大学硬式野球部", new String[] { "京都大", "学", "硬式", @@ -127,7 +127,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { */ public void testRandom() throws IOException { Random random = random(); - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkRandomData(random, a, atLeast(10000)); @@ -136,7 +136,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { /** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 8192); @@ -146,7 +146,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { // user dict to analyzer works: public void testUserDict3() throws Exception { // Test entry that breaks into multiple tokens: - final Analyzer a = new JapaneseAnalyzer(TestJapaneseTokenizer.readDict(), + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, TestJapaneseTokenizer.readDict(), Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); @@ -163,7 +163,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void testCuriousString() throws Exception { Random random = random(); final String s = "<li>06:26 2004年3月21日 [[利用者:Kzhr|Kzhr]] "お菓子な家族" を削除しました <em><nowiki>(即時削除: 悪戯。内容: &#39;KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK&#39;)</nowiki></em></li>"; - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkAnalysisConsistency(random, a, random.nextBoolean(), s); @@ -174,7 +174,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void testAnotherCuriousString() throws Exception { Random random = random(); final String s = "《〔〘〝」〩〄〯』〴〷〦〯〹】〰。 〆。〡〢〲〆〤〫〱 〜々〲〿〄》〃】〚〗〪〓〨々〮〹〟〯〫』」〨〒〜〃〃〡 〸〜〱〆〿「〱〳。〷〆〃〷〇〛〥〒〖〪〕〦〚〉〷〼〣〒。〕〣〻〒〻〼〔〸〿〖〖〆々〭《〟〚〇〕〸〲〄〿〙」〞〖〪〬〤【〵〘〃々〦〮〠〦〛〲〝〿〽〓〺〷〛》〛『》〇 〽〄〱〙〥〠』〨〉〨〔」》〮〥〽〔〰〄〶】〠〶〨〔々『。〞〙〮》【 〯〦〯〩〩〈〿〫〘〒》』〾〰〰〼〒「〝〰〱〞〹〔〪〭、〬〴【』〧〩】〈。〧〤〢〨〶〄〴〡。〪〭〞〷〣〘〳〄〬〙『 「」【〮〯〔〱〬〴〵〭〬〚〱、〚〣、〚〓〮、〚々】〼〿〦〫〛〲〆〕々。〨〩〇〫〵『『〣〮〜〫〃】〡〯』〆〫〺〻〬〺、〗】〓〕〶〇〞〬。」〃〮〇〞〷〰〲】〆〻。〬〻〄〜〃〲〺〧〘〇〈、〃〚〇〉「〬〣〨〮〆〴〻〒〖〄〒〳〗〶、〙「 〫〚《〩〆〱〡【〶』【〆〫】〢》〔。〵〴〽々〱〖〳〶〱《〈〒』『〝〘【〈〢〝〠〣「〤〆〢〈〚〕〿〣々〢〹〉〡 〷《〤〴『々〉〤〬《』々〾〔〚〆〔〴〪〩〸〦』〉〃 《〼〇〆〾〛〿」〧〝〽〘〠〻【〰〨〥《〯〝〩〩〱〇〳々〚〉〔『〹〳〳』〲『〣」〯〓【々〮〥〃〿〳〞〦〦〶〓〬〛〬〈〈〠『〜〥〒〯〜〜〹〲【〓〪《々〗〚〇〜〄〦『々〃〒〇〖〢〉〹〮〩〽『》〵〔】〣〮】〧、〇〰〒】《〈〆々〾〣【〾〲〘〧『〇〲〼〕〙「〪〆〚々〦〯〵〇〤〆〡」〪》〼』〴〶〪】『〲〢〭〬〈〠〮〽〓〔〧〖」〃〴〬』〣〝〯〣〴『〉〖〄〇〄〰〇〃〤、〤》〔〴〯〫〠〝〷〞〩〛〛〳々〓〟〜〛〜〃 〃〛「、』》》々〢〱〢〸〹〙〃〶〇〮〼」〔〶【〙〮々〣 〵〱〈〡〙〹、〶〘【〘〄〔『〸〵〫〱〈〙〜〸〩〗〷》〽〃〔〕〡〨〆〺〒〧〴〢〈〯〶〼〚〈〪〘〢〘〶〿〾〹〆〉」〠〴〭〉〡〮〫〸〸〦〟〣」〩〶』《〔〨〫〉〃〚〈〡〾〈〵【〼《〴〸〜〜〓《〡〶〫〉〫〼〱〿〢々〩〡〘〓〛〞〖々〢〩「々〦〣】〤〫〼〚〴〡〠〕〴〭。〟「〞》』「、〛〕〤々〈〺〃〸】〶〽〒〓〙》〶〬〸〧〜〲〬〰〪。〞〒【〭〇〢〝〧〰〹〾》〖「〹」〶〕〜〘〿〩〙〺〡〓〆〵〪〬〨〷〯〃】〤〤〞〸》〈〹〖〲〣〬〲〯〗〉〮「〼〨〓々。〭〆〶〩【〦〿》〩〻〢〔〤〟〯【〷〻〚〟」〗《〓〛。〰〃〭〯〘〣》〩〩〆」【〼〡】〳〿〫〳〼〺〶『〟〧』〳〲〔『〦「〳〃〫〷《〟〶〻〪〆〗〲〮〄〨〻』〟〜〓〣〴〓〉、〷〄〝〭〻〲〽〼〥〒〚〬〙〦〓〢〦〒〄。〛〩〿〹「〶〬〖〬〾〭〽〕〲〤〕〚〢〪〸〠〸〠〓〇〄〽〖】〵〮〦〲〸〉〫〢〹〼〗〱〮〢」〝〽〹「〭〥「〠〆〕〃〫々【『〣〝々〧〒〒】〬〖〘〗〰〭〢〠〨〖〶〒》〪〺〇〡》〦〝〾〴〸〓〛〟〞」〓〜。〡』々》〃〼』〨〾】〜〵々〥【〉〾〭〹〯〔〢〺〳〹〜〢〄〵〵〱。〯〹〺〣〭〉〛々〧〫々〛〪。〠〰〖〒〦〠〩〣〾〺〫〬、》「〚〫〲〸〶〧〞〯〨」】【〚〲『〽〡》〘〣〒〕〸『〼〘〿〘〽〤〿〶〫〆〾〔〃〱〫〱〧、 〒〰。〜〸〇〜〔〉〡〬〿〝〼〉〷、〠〘〉』〥〫〧〕》》〡〻〨〲〔〠〮】〰〮」〧〬《〦〼〽〵〭「〷〮〈〴〔〭、〣〔〥〱〔」〄〘》〡〣》〴〙〜〖〬〺〯々〟〗〥〥【〝〨〝〽〼〚。〙』〤〬〞〜〣〮〬〳〽〦〩 〶」〠〄〳〠〇〜〒〶〱々〠『〡』〭〰》〴〉〫〬〒《〽『〉〳〵〄〨〮〔〭〞』〡〚〩〦、〠【〓〯〬〦〛〽〉〜〻『〗〫〞〩〃〼〿〡〕〯 〸』》〼〮〆」〼〪〇〭〣〗〓〻〧」〙〳〱〥〳、〓〕〮〫》〧〃《〣』〹〬〣〶〡〾〙〮〕〶〧《 〨〇〺〳〉《《〕〜〰〱〕〛「〞〩 〓〢〄〣〼〢〽〇〛〟〖〘〳〤〫〡〫〬〦〘〪〶〝《』〜〕〝】〄〡〳〹々〯【〝〝〇〔〹〿〥〄〚〒〻『〺〮〇〲〒〾〙〞〉】〉〪〫〴〒〔〨〮〰〻〷〿〥〮〼〹〩〱〞〈〴〦〰〞【〻〾、〵〻〛〮、〻《〘〱〫〾〄〄〙、〔〖〘 》〻〧〦〃〣〬〰〗】〸〵。〄。〷〄〸〟〰〓、【〖〰〢〾〘〆》〜〶〻》〔〛』〦〩〷〴〃〴〫〱《「〖々〖〒〡〞。〱〡〖〤〫〇〜〒〴〯》〪〶》〘〨》》【〵〹」〤〯《〦〶〯〃〧〙〩」「〤】》々〣〱〯〞〰〢々〵〷〺〾〺〜〜〚〣〿〩〰《〄『〧《〜 〷〓〺〦々〚〨「〧〮《〥〸〞【〡〩〩〱〴〗〙〿『〇〭〖 〹〥〲『〗〛〯〷〃〽〝《〳《〡】〄々〱〆〯〦。〒『〡》〨〃〦」〬〄〬〔〭〫〼〲】』〗〔〼〴〹〠」〺〬〺〔々》〾〿「〺〖〤》〴〶〣〚〒【〤〄】〹〺〟〃〜、】〪〚〯〢〹、〶〖〭】〾〠」〉〆〾々〯〈〙〞〶〩】〺〟〫〽〫〸〵〛〙〃「〰〫〓』〻「〦〤〖〺〇。〨〟 〦〙〘〨〸〒〣〈〩〜〧〾〒〕〤 〇〴〮〝〈〿〢〴〟〷〭〴】〽〇〟〦〬〶〲。〫〸〮〝〆〸〄〣〦〲〢〇〫〻〹〕〶〥〖。〨〬【〥〽〓〵〯、〒〉〳〘〧〼〆〹〉〾〬〽】〹〲《〜〨〟〡〪〱〃〓〬〜〧〝〸、〢〝〦』〝〸》】〩〡〉〫〛〇〢〖〔〠〹〧〕〨〃〙「〲〗〙『〛。。《〸〔〾〧〉〠』〡〼〄〨〲〥〼〠〻 「〸〩〟〷【〮〜〧〿〾〜〈。〣〰〪〘〮〴〨【〩〜〟〟〼〻〦〝「〺〝〄〵〝〲〃〨〺〫〜 〮」〡』〜〿、〪々〕〫〃〒〔〛〻〲〹》【〚〣〯《〢〙〕〝〾〙〭〄〕〗〄〪〵〃〘〺〻〤〟〢〻〆〥〝】〠〬〧〾〮々〪〓』〷』〿〕〒〽、〷〉」〨〨 〄〽〾「〧〴〜〢〮〚〆〣《〬〺〟〥〼〛〆〓〚々〇々〈〉〗〨〳々〣〭〯〉【〩〮〺〪』〭〚〉〦、〃〘〦〮」〴〆〴〔〴〜「〠〴【〰「〫〳〟〾〶〉〨〲〚〩〷〄《〄〝〈。〧〟〳〃〹々〃〄〭〬〰、〥〬〸〱〉〩〴《〔【〠〳〪〧〫〽〓〭】〧」〮〒〸〤。〩』〭〖〛〭〯〨〕〞〮〞〬〹〺々〽〡〷〪〶「〹〯〝々〭〠〼〰「〒〉」」〡〆〜〾〪〾》〇〙『〚〿〽】〛〮〶〚」《〔〔〣、〄〗〩〭〠」〠〰〞《〸〧〺〰」『〾〯〃〓〓〩〣〚【〜〭 〝〨〗〷〒《〫〝〶〘〣〿〜〱〾〨〥〘〃〳〆〇〈〜〲〪〡〶〭〤『〝〖〷〦〾〬〟〠〳〻、」【〣『〺〞〴〳「〵〺〨々〩〰〢〧〣〃『〹」〉〓〘〦〣〄〕〞〵〧〜」〴〠〱》〮〬〄〶〆〬」〘 〺。〲。 〾〷〕〛〣〾〗、〭』〭〧〝。〮」々『〻〒〣「〳〩〪〝〒〥〻〘〰〼〭〆〷〭「」〚〔〬〃〝〮〩〪〽〱。〯〯〰〨〿〷「々》【〴〧〻〰、〶〡〹〩〡〺〲〼。〩〿〯』〟〴〼〦〤〙〢〩〔〲〆〗〲《〟〤〬〷〧〫〧〗〞〣〚〚〧〭〮〛〲〮々〩〩〕〬々 〥〸、〢。〿〵〺〤〲〝 〥》々〰」〮〩〛〛』々『〹〞〃〃々〚【〱。。〹〨〿〻〣〞〨〈〤〼〃〻〩〶『 〲〷〗〭〓〯〯〝〃〾〕〻〖〱《「〹〣〦 」〵〄〮〚〖〞〪〼〖〙〵》〰〃〘。【〺〖〄〪〝〭〆〬〚〬〨〽」〕」「〜〤〯〷〇〝〠〆〫〼」〭〤〓〔々〆〵〷〪〭「〆〖〇〽〄〄「〿〵〷〤〿〮〫〻〢〕〝〪〳〸〘〡〡〞〮〻】「〝〷〘〾〒〺〉〨〰』〳〓〃〒〪【〗〯「〧々〷〩〝』〭〇〒、〯〈〦〣〆〬〸〚〈〉〔〥《々》〹〢〺〤〝々《 〲〘】〚』〚『〯〼〾〱〵〻、〪〟〸〯〽〴〱。〵〪〫々〳〢〣〕〓〩「〘〜〨〻】〿〹〭〛〛〔〹〻。〛〴〤〢〮、〸〷〃〜〜〝〔』〳〮〹』〽 〶〛 〤」〢。〣〖〶〯〥》〢〸〸〤〕〣〘》〧〦」〘〻〶〾〮〢〳〝〙〻〦〺〇〲〢〔〘〶〩〖】〟〓〰〇〮』〦〄々〹〻〄〄〽〷〱〫〒〛〉〿〓〯〺〪〲〢〼、〫〬「〩《〡〕〻』〭〜〗〫】 、〈〙〉【〓〣〫〜〈『〾】〴〪〫〬〶〪〚〬〿〪〮〴〒〶〡〄〉〿〼〜〵』〻〼〢「〵。』〸〖〙〧』〾〖〙《〉〪〦〙〔〈 〤〫〦〸『〗「〣『〓『〡〨〖〥〭《〢〠〦〞〸〞〚〢〕〙〖〾、〩」「〗〈〰〸〤〴〶〤〙《々〆〽〆【『〬〝〸『〙、〪〻』〓〹々〥〲〉〪〹〫〓〽〪〩〷、〹〺〩「〞『】〡々〡「〇〉〺〶〾〔々、〾〻〪〣〖〡〩〥〾〯】〤〰》『〲【〙〭〽〛〿々〟 〢〃〼〕〫〲』〪【〛〯】〔〕〥』」〳』《〖〥〳〄〢【〩〮〫〥〝〯〿〟、〣〹〪〔〱」〖〢〘〛〾〾〜〒〝〷〚〳〣〝〟《】「》〻『〢〄〄 》〱〓〞〛〢〆〺〉《〃〭〙〻〞〷〩〹〥〦〫〞〄〇〯〽〱〼〴〾〕〸〿〱〪〨〟〠々〪〸〔〵〆」〔〖〴〝〟】《〥 』《〒〄〣〿〞》】〃〹〲〛〬。】〒〓〹〴〿〥〴〲〖〧〝〪〶「〕〔〞〜〸〬〒〽【〸〻〢【〱」〪〉 〉〘〪〻〴〞』〯〰〾〥〓〼〻〕』〠〃〟〩〛〔【〻〡「〘〔 〲々〻〚〈〪〱〾〷〗》〯〞【〩 『〕〪〈々〞〞〳〘〵〃〼〨々〇〞〈〹〧〢〃〢〮〆〈〤〘〬〟〽〩、。〲々〺〠〳〸 〸〹〥、〯〒〈〃〠〰〙〪〯〬〖」〔〹〔〘〶〾〨〿〛〈〡〯〕〶〲、〷【》〷〆》〄《】〒〓〔〼〉〒〢〄〢〓〩〰〃〔。〵〙』。〷〼〩」〒〒〇〳〆〘〯「〢〠】〱〱《〤〽〢〄〤〵〪」〆〘〲〪〼〷〕〚〙〢〳〲〦〥〃〩〳〤「〽〽〇〖〶〶〾〴〰〷〨『〟〲〬〵〲〸〩〕〣〫】〝〇〡〿〳〦【〧〖〓〫〿〣〖【〙【〵々〶『〵〟〠〇》』〲〹〾〰〰〙〚〖〳〞〄『〤〠〇、。〆〧 〒〘〱〾〢〲〵〇〼〼〪〤〵〓〴〦〵〛『〘』〭〔〯「〓」〤〼〱〒〤〶〰〖〬〻【〳〵〡〃〙〠〩〛〝〰》〸《』〦 〿〭〵〺〈〓〵〛【〴〤〒。〪〷〢〡』〒 〄〚々〽〄〔〖々。〪〠〢〸〮〵〾「〉〙〆〘〣《〩〽〃〄「〕〢〻〉〷〛〫〇〪〯〵《〷〚〕〇〟〔〛『〣〆』〸〶々〳〾《〭〯〫〄〔〗〨〺〛〴》〻〫〨〢〜〱〇〦〘〺〉〫〇〧〿〶〲〉〖〵〦〹〷〳〈〞』』〡〓〺〟〡〭、〧〺〺〱〟」「〠〡「〠〬〰〙〹〥〙〓〶〫〳〣〢〳〇〫』々〡〚「〮〘〭〹〶〸〮【〔〚〆〆〼〷〖〒〤〲〕〳〴〾〇〔〹「〦〔〹々〘〲〔〃〡〪〚〪〗〉〓〫〦 」〟〳〛〉〹〺〭〲〆〙〽「〱〘〿〡〭〦】、〠〰〢〥《〶《』〶〃〼〄〪〥〙【。』 〸〳〈〇〡〩〮〃〹〘〧〿〱々〿〭》〶】〥〜、〬〖〠〢。〾〫〔〩〥〫〓」〲〢〛〶〚〡〈」〡〦〼〰〔〾〨〔〄〹〬〛〃〇〸。〽〠〵〙〠【〶〉〇〗〔〒〒〇〉〧《〗〮〟〡《〉〻〧〝〓〱〧〜〘〦【〸〘〩、〵〡〈〴〭『〉〕〴〯〰〘〳。〴〃〙〨〄〈〿〒〕〯」〼〳〤〱『〓〚〛〳〣〳〺〒、〃〚〲〲』〳〃〷〵〹〷〾〞〞〹〣〢〨〵》〽〮〒〹〻〨〜」〇〗〨〙〒〃〆〫〹〉〻。〄〔〧〝〒〷〛〲〧〪〺〚〼〳〒〙〫〢「〲】〾〬〸〷〿〉 〱〛〙〰〜〧》〳〉】】〮〈〗〢〧〟〠〣 〭〵〰「〼〽〭〫〘〴〲〺〾〘「〮〯〩〛〤〣〥〛】〱。〬〴〞〰〣〻〵〹〤〇〴〮〦》『〨〛『〡〞〥〄〠〸〽、』〣〬〢〠〯〰〄〇〆》〇〵『〹〛、〃〟〙〡〷〿〩〥〶〲、〓〧〲〪〚〕〞〢〗〖〝〰〵〪〴〿』〱〮〳〫】《〹〟〻〝〓〦〣〞〤〷〠 〃〈〛「〱〿〆〟〟〉〤〿〈〦〥〻《〻〼〇〢〰〢〒【〞〆「〢〻〧〇。〭々【〪『〪〓】〹〃〄〹〕〝〒〚》〔。〕〶〺《』〦〗〳〰〶〨〔々〖《〰〷〛〩〨』〤〻』《。〵〱〼〵〛〝〧〼〡〶〧〾〯〷〞 〧〛。〦〛〪〕〶〱〆〤〻〹〱〰〖〨〥〚々。〾〽〦〸】〛〇〫》〃々々〲《『『〱〘〲〕〦〇〱〈〞」】〞〨〖〚〽〧〥〬〰〬〥〇〡〼〴〲〠〭〖〵〯。〙〪〖〯〄〾〮〗『〉〴〩 〃〚〲〠〨〟』〖〜〥〛〉〲〃〃〮〳〡〳〩 〄々〞〨〛〪「〼〓〭 々〵〘〄〝〭〖〰〾〬〆〸。〻〓〞『〥〗〪〚〇〞〭〤〉〼〬〕【〤】〥〡〛〖〕〆〧〝〧〺《〭〈〸〪〆〺〸〝〭〇、〆〯〴〸〤、〾〒〉〰〛〷〽〶〿〰〫〜〔〪〱〇』』〰〨〞〓〽〻〻〙〪〠〨〗〓〣〨〾。〜〃〘〚〇〟〖〗【〥。〡「〾『〙〢〦〹〩〟〠〘】〾〒〈〔『〣〲〉〉〻『〇〦〽〿〼〾〚〮〧。〷〰〲〧《〹」〕々〻〤〗〦』《〳〢、」〤 〰〞〠〨〾〪〯〮〳〒 〰〜〼〕〰〳〄》〤「〗〽〇〠〔〝〚〽〣》〷〙】〶〷〆』〇》〓〄〤〸【〡】〾『〯〶、〵〨》〼〗〨〶〉〄〭〓〲〞〝〞〡〻〷〻〣〰〈〽〮》〲《。〸〶〿〣〞。】〡」〖〩〔〜〘》〤〦」〓『〨 〹〞『〛〡〧〬〃〷】〔〫〆〤〻〲〆〯〞〿〧〔『 。〓〳〝〢〿〮〯〵〮〨》〴〒」〒〷〻〶〡〽〤〭〽〰》〾〹。〳〔〹》〴〕〫」〹〜〻〦〳〕〺〘〴』〈〽〲〃〔〙。【」〇〨》〨〴〿〄〻」〉》「〚〺〿〹〤』〄〸】〴〩々【「〫〒】〄〛 【〰〯〶〰〉【〮」〦「〣》〴〙〿〽〄〔〈〓〻〠」〚〯 〷〄〆〳《〸 〴〕〩〸〾〡〼〻〆〬〶〞〓〤〩〿〪〻、〠「〲〓〠〦〛〢〓〇〸〡〬〱】〞〫〽〖〉、〻〿〈〸〓〹〯〰〸〰〘〫 〬〬〽〦〣〾々〥《〰〗〩〰〞】〪〆〷〳〚《〯〱〓〣〭〗。〬「〢〸〮〤〓〖〾〣』〘〳〕【〼〤〔」〵〰〪〡〲。〤〃』〧〙【。〝「〶〻〝〖〢〡〿〓〖〺〝〈々】〈、『〼〣》〔〪《〢〣、〛〕〙〞〭〿〧〵」〴〾〯〫「〨〕〨〄〷』〵《〶〼〘〗】々〖 〳〶「《〝〰々〢〙〈〣〶〟〓〱〬〇〷〦〿』【〕〪〶〺〽〄〡〷〽〲》〟〃」〵〤〞〤〠〜〵〽》〉〡〦〖】〉〓〥〤〞 〺《〖〗 、」〯〳「〾【〩〮。〝〮〙】〦〴『」〘〕〉〚〯〳〇。〾。〇〔』』〚》〃、〠【〝〮」〟《〆〮〇」〥。〟〦〿〠〟〰〺〳々〯】〨〸〼〳〭〶〷〮〨〳〘〤〦。〠『〸〖「〰〝〡〻〻、〇」〇〚』〧」》〮〲〫】〱〼〻〲〷〓〉〵〩〢〣〻〚〞〧〰〽〕〭〧々〠〹〃〟〄〰〚〽〣〚〥〺〛〟〄〮〟〴『〾〒《〺〡 〒〜〈〶〔〫〲〃〟」〿〘〥〥〥〓『〝。〧〾〓〶〺〆〷〩〣〫〜〿〿〰「〕〒〓〯〣〘〗【【〪〾〛〕〽〫〹【〿〧〛〵〲〛〒〇〉〧〺」〺〺〡『〳、〪〾〒〈〮〜〞〙〱【《〣〬〈」〣〵〹〥〵〞〻〆〭〵〟〒〲〧〓〖〣〓々〰〞〹〇〮】〪〫〶「〦〽〓〻〓】〽〭「〣〔〹〯〨〖〩〵〦〳〯〯〧。〗』〾『〩〗〴」〼〗〨〵〥〴、。〒〣〧【《〓〜〓〠〢〓】〷〺〼〕〡〆、〦〿〥〾〚】〕〦〖〙 〭〬〙〇〳〄〃〄〻〧〔〚〰〲〟〷『〫 】〲〲〸〳《〢〵〰〟〪〉〜〨〇〶〻〻〩〄』〒〴〨〈』〗〿〚『〝 〹々〳〼〲〗〙「〵〲〢〔〫〵〜 〘〶【〬『〱〗、〧『〛〇〛〒〈 、〦】〙〇〖〤〩〜〉」〉〿〬〧【〶〦〃〘〈〖〄〶〦〚〜】〛〽〡〸〰々〈「〾〼〒〥〞〸」〮〸〒〗〙々『〇〄〈〃〜〺〯〉〉〾〹〺〚〞〽〦〄〢〽〄〞〻 〼〄〘〙】〚〼〫〴〚〫〬〖〭〔。〰〹〶〺〕〨〇〛 」。〇〿〲「。〆〗、《〫〬〨〻〝】〓〥〾〴】〹〈〞〺〜〰〜〬〴〱〜〖〾〣〭〥 〯〩〶〈》〸〝〼》〶〆〆〽〼「〗〓『〕〃】〡〠〹〺〈【〸〝〤〮〸〭〩〼〈〃〃〉】〳〿〃〬《 〩〈〒〢〠〆》〇〭〬〓〖〝】〧〶〞〈〶〘】」〽〝《〡 〈〟〶〯〹〦〨〷〩〧〞《〵〬〰々〞〧〓〥》」》〤〥〧〧〓〛。〦〄〫】〪〔〟〟〷〧〷〟〺〪〩〷〡〘〞「〔〽〯〔〬〈、〴〨》〥〒々〼〒"; - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkAnalysisConsistency(random, a, random.nextBoolean(), s); @@ -185,7 +185,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void testYetAnotherCuriousString() throws Exception { Random random = random(); final String s = "〦〧〷《〓〄〽〣》〉々〾〈〢』『〛【〽〕〗〝〓〭〷〷〉〨〸〇〾〨〺〗〇〉〲〪〔〃〫〾〫〻〞〪〵〣【〩〱〭〨〸〃々〹〫〻〥〖〘〲〺〓》〻〷〽〺〯〫』〩〒 〇〔】〳 〵〮〇〡「》〭〆〒〜〱〒〮〺〙〼」〤〤〒〓〶〫〟〳〃〺〫〺〺〤〩〲〬 〱〜〝〤〘〻〚〻〹〒〃」〉〔「〺〆々〗〲〔〞〲〴〡〃〿〫」〪〤」「〿〚』〕〆』〭『〥〕〷〰〝〨〺〧【『〘〧〪』〫〝〧〪〨〺〣〗〺〮〽 〪〢】「〼〮〨〝〹〝〹〩〳〞〮【」〰、〳〤〩〄〶〞〠〗〗〙〽々 〟〴〭、《〃〝〈〒〸〷〓〉〉〳」〘」》〮〠〃〓〻〶〟〛〞〮 〇〨〭〹』〨〵〪〡〔〃〤〔〇〲〨〳〖〧〸 〴】〯〬」〛〨〖〟》〺〨〫〲〄〕」〵〦〢〴〰〨〺〃〓【》、〨〯〥〪〪〭〺〉〟〙〚〰〦〉〥々〇】〼〗〩》。〩〓〤〄〛〇〨〞〣〦〿々》〩『〕〡 〧〕〫〨〹。〺〿《〪〭〫〴〟〥〘〞〜〩。〮〄《〹〧〖〿》〰〵〉〯。〨〢〨〗〪〫〸〦〴〒〧〮」〱〕〞〓〲〭〈〩『〹〣〞〵〳〵》〭〷「〇〓〫〲〪『『》〧〇〚〴〤〗〯〰〜〉〒〚〔〠〽、〾〻〷〶》〆〮〉』〦〈〣〄、〟〇〜〱〮〚〕》〕〟〸〜〃〪〲〵〮〫〿〙〣〈 〳〾〟〠〳〙。〮〰〴〈』「〿《〄〛〩〪》「〓〇〶〩〇、〉〦〥〢》〴〷》〦』〉〟〲〚〹〴〲》〣〵〧〡〾〦〡〣「〆々 〔〄〓〡〬〹〣〰。〵〭〛〲〧〜〽〛〺』〛〵〒〽〻〆〚〚〟〵〲〺〠〼〻〄。〯〉〃』〕〫〥〦〕〔〢々〷々〥〥〖』〶〿〘〗」〖『〢〯〫〇〣〒〖〬〜〝〩〉〾〮〈〩、〘〰〦〧〓〬〸〓〺〼〟〰々〩〩〹〣」〓〸〄『〆〰〹》〵〉】】〼』』〸〣〦〾〰〗〴〥〴〤〃〿〡〳」〢〩〡〮〻〘〤〝〗〃〪〘〈〴〪〯「〭〓々〃〯〄〼〚〧々〢〃〈〔。】〆〣〰〜〪〮〣〿〕〮〾〱〇〈〟〭】〔〥〡〝〙〛〔「 〼〶〸々〹〯『〞〒〇〟〃〳〓〩〝〿《〵】〙〛〪 〭〼〈。〷》〨〰〵」〤〄〾〄「〈『〥〽〕〙【〤》〳〝〔〠〤〲〘〱〈『〴〫〚「」〛〸〹】〱〒〆」。〯〃】〼〮〒〄》〾〷〥〟〞〲〜〲〟〫〕〆〇〸〸〹〾〰【》〨〤〭「〇】〳〯〤、〙〳〺『〲〽〬〥〠。〹〃」〹〪〭〒 〇〶〧〟〻【」】〙〤〡〱〖》〇々〽〬〥〨〠〘〺〳【〫〄〜〹〄〚〯〈〸〻〓〥〤〻〮〃〗々〪〺〿〬〙〈『〭〩〟〽〬〝〄〦〇〥【〨〫〦〗〯〞〜〈〒〽〖〧〼〈〭〓〶〃〰〙「〧〉〹〢〕〼〒〸〼〣〡〔〩〯〼〚〲〖〪〯〒〮】〥〙〯〆〡〲〾〭〫〕〘、〖〮】〟〺〝〨〤〯〓〛》〳〢「〒〥『〿〔〸。〫〬〡〓〝「々」。〘〣〲〴〆〲】〽〮〮〲〓〞。〲〘〉【〲〭〰〨〩〱「〆〩。〦〉〇〄〺〱」〮〄〯。《〭〹〳〸〜〮〧〷〜〹〥〾〨〬〦〮』〖】〖〥〞〕〧〹〽、〺〜〯〒《々〠〠〴〝〤〇〷『〳〞〠〤〣。】〝。〛〉《〩、〦〻々〄〙〞〽〒〧】〉〺〦〔〄〯〙 〫〴〈〽〴《〰〱〗〢〓〔〗〖〖〪〷〠。〨〠〙〴〷〿〻〴〪〠 》〉「〛」〟〗「「〚〤「〫〨〣〉〶〥〢〈〯〄〈】〃〵〪〼〸「〾〥〒〲〮】〙》〡〯〓〵〡《〬〾〛】〄〡〦〪、〆、〵〒〹〰〴〜〬〶〭〕〟〠〰〜〶〵〨〾《 〻〵〔〘〟〾「〡〃〼。〤〺〭〨。〪》〄〇〄〔〖〺〪』〆〸『〰〭〆〗〪〪 〇〜〡〨〞〧〇〛〥」〼〇〼『〸『〵〼〇 〽〹〨〪〗〳〽』〵〽〸〷〄〿 〩〢〺〳〗〞〹〒〼〕。〇〷〔〯〜〘〾」」。〥〯〤〖〛〙〹〘〯〡〱〮》〰〾〚〚〣〆〰〹〾〝〉〲〠〗】〤〿〶〱〾〇〽〤〰〆〭〝】〤〰〼〪〬〰〸〓、〃〵〄〉〤〲〱〨〵〴〮〹〬〧〜〭〶〒〯〺〬〒〭〲〡〔〚〹〇〫【〯〥〪〻々々〨〧〳〛〯〿 〈〽〥〘〖〣〿〫〲〶〚 〓〙〫〴〆〙〶〽〉、〔〪〫】〤〟〓〃〝、〧〡〸〸。〸【〹 〧〡】〡「〗〴〴〳〶『〱〖「〺〠〼〾〱〃〖〤「〧〭〟〇〧〙〕〩〭〻〤〩〪〳〪〟々。〷〥〗。〳〸〆〢「〆〿〻〚〳〚〸〟〘〡〘〇〶〖〡〇〾〥〖〝〝〹』〦。〖「》〥〞〳〛〕〖〥〻〙〾〔〬〈〇〓」〭〹〷〪〖《〫〾〒〙〺〻〨〼〇〝〾〣〴〚〩〴〕〢〦〩』〭〧〵〾〟〣〬〥〟〣〜」、《〲〧〪〸〸【〙〹、〤〽〰。〦〩〮〹】〸〆〹〗〓〶〇〤〳】〾〨〞〩〱〡〇〱〮。〶》〝〱〗〃〘〣〬〲〽〈〒〻〃〥〪〭〤〗〰『〵〹〙〇〵【〕『〤〄〕〥〵〸〮。〳〮、〤〣〱〧』〯〜』〉】『〷 〰〵〓〙〃〟〆〼〞「〫〄」』 〨〹〸《〷〔〫《〝〞〆〬〩〟」】〾〷〄》 〵 〫〵》〻〨〰 〟〈〰〽「」〸〣〪〮〛〞〜〦〱〚』〕〱〪〲〩〥「〚〓〺〣〶〨』〕〇〮〹〟〞〕〶〡〭〠〕〦〦〢〽〤〈〈〻〣〧〱〿〵】〖〞〖【〢〩〼【 〻〘〃〤〫。〠、〗〢〷」】〼〘〖。〤〘〄〢〴、〘〆〯〱〜〃「〦『〯〰〘〫〹〶〷〿、】々〙〛〜「〹々〮〿「〸〉』〯〱〄〓〥〣〩〥』〖〤〛【〭〿〺「「〳〛〧〉『〈〆〒〠》〳〈〳〩〃〮〚〼」〲〮〩〮〮〢〸〿》〈〉〗〾〇〕〩〸〖〾〠》〃〞〄〣〭〡〕〣〚〆〤〄。〸〞。《〼〄〤〸」〿》〤「〵〥【〔〕々〙〸〛〛『〶〾。〷〫〼〽〤〨〓〭〻〈〶〿〾〨』〤【〾」〇〤〒〠〺〜〸〼〪〢〷〔》〣〤〬〣〱〝〇〺〢〠〤〹〡「〪〲〿〬〘〡〯、】〖、〈〶〛〢〕々〽〼〼〚〿〘】〢〰〡〿〗《〉〙《《『〶【、、】〡〓〦〞〵〤〧』〝〕 〄〃〸〈〤〪〻〭〉〘〷〉〕〨〻〢〢〡〸〔〮〧〹「〦〘〉〾〉 〺〽〷「〺〖〺〝》〃 〇〪〜〶〺〣〇〭 〾」〣〼〞〷々〽〤〶々》〻〈〽〒〕『〬」〈〟〕〷〼〲〄〚〜〴。〮》々〧〻〔〕〈「〾『。〴〷〯〢〿〦〈〸〩〻〃〻〚〞〤〈。〧〇〾〺〢〓〵〸〛〔〡〷【〜〺 〕〶〦〣〻〟」》】〺〚〷〺〹〙〳〺〬〓〢』〘〕「〸、〙〾》〖》々〬〄〇『。〵【〩『〺〆〮〮〙〵〫《〃〽、〓〠〨〚〕〈『〦【〗 〄〴〫〡〮〱〔〆〗〟〵】〻々》〲【〬〢〚〛』〱〰〫 〇〤〴〮〾。。〮】〇〲〻〙〰〥〚」〟〜〄〟。〤々〞》〧〉〳【〿〺〆〈〖》『〤〄』〾〵〲〸。〈『〕〺〘〣〶〬『〪〆〳〽《。〒「〽〨〸〜〚〘〪〤々〦〆〺『〣〆〽〇〿「〥〵〒〲〟〜〳〭〼〆〡〮〆「〆〥〺》〱『〺〔〃〙〻〥々《々〙〼〪〼〵〙』〥「 〵〯〓〩。〰〕《〟〦〝、〦〦〤〗〴〩〹〶〠〰〡〇〤〹〓」〣〆〜〴〘〔〃「〤〈〩 〠【〃〙〢々〉〝〬〙〭【〮〗〙〤〿〖〓〫〻』〞〤〼〳〹〄〵〾〔〛〮。〒〉〤〣〭〰〨》〭〲〗〃〇〆〡〜〱〲〮〫〄〬〄〉〯〈〮〩【 〮〦《〪〲〣〡〶〬『〲〵〇〶〰〒〭〽 〰〄〻〄『〬〩〠』〕〫〤 〼〶〳〮、〓〸〲〓〜〳〺〈〫〺〒 〨〡〡【〷〆〇』〝〩〨〗〕〪】〪 〛〛〺〙〷〦〠」〱〞〼〸」、〢 〺」』〲〆〃〟〱〟〝「《〸〳〒〖〨】〥〖〈〧〼。〫『〙〧〡『】〔々、〼〝 〕〙〇〘〲〔〝〺〘〄〓〒〼〈〛『〺々〩〱。』〬【〱「〳〜〼〬〴』《〗〔〡〰〪〤〥〲《』〥〉〪【〶〤【〻〡〒〯〜【〽〪〉〠〾〙〰〚〵〦〦〴々》〙〠」》〠〱〓【〶〦々〻【〽〶〼〺〷。〶【〘》〻〗〳〣、」】〳〓〞〆〆〾』「〈〙〕〱〢〳〨〰〡〸。〣〪〤「〱「『〙〽〇々【〜〖〮〚〟」。〜〰〉〔。〣〽〇〖〬〆〥〖〧〨〱〡〸〪〣々』〄「『〞〶 〴〰】〃〱〱「〶〝】〞〭〚〴〶〻〟〧〡〳〬〧、〣】〕〼「〠〃〷〣〩 〭〄〩〝〦』〟〇〦〟〕。〩』「〵〩》〿〻『〙〼〲〰》〨〉〆〓、〺〹〸。〞〧〗〘〳〓〞〹〕〡〼〔〖〴〄】〚〻〯〴〣〮〦〧〣〵〼〚〾〫〼〣〔〚〽『〵〒【【〝〹〮》『〨〜〠〸〠〵〨〙【〧〸〈》〱〗【〓〤】〰】】、〩〽〈〸〔「〵〻〙〓〰〇〚〞〗〙〢々〭〜〈 。〧〿〧〨〵〾〝〬【』〫〦〸〬〈、〒〢〉〞〵〒〼〝》〻〫〧〤〶〹〼〩〛〫〣】〿」〴》〺〬〤〕〲〕〙〔〪〰〿〬〒〔〞〆〻〴〘〩〨〤》〩〪〭〳〇〣〚〟〚〕〓〴〱 〵〃〠〭〠〚〗〃〃〸〰〢〡〿〭『〗〉、〲〕〧「〛〛〓〜〰〮』〱〨〬〨〽〸〽〶〣〯〫、〯々、〴 」〕〥』〻】〖〴」〨。〖〤『〜〰〩〣〣〸、〫〝、〯〹〷〳〚〄〷【〃」〼「〤】 〢〖 〣〙〺〽〽〱〤〔〓々〣〭〽〘〦〻〪〿〞〝〱、《〆」〸〷〛〓〕〹〜〪〹〶、〵〦〛〲〒〹〪〦〃〥「〸〪〙〧〱〠〰〝〆〠〯《〼 〛〚〔〟〽〗」、〲〥〞〴〃、「『〖〼〞〪〼〇』〿〶々〙〻》〥、〵〛〞〠〫〟  〹〾〵』〤〿〣〪〗〃〖〬〩〴〗々〓〝〥〥〜〲〯〗〤》〛〮》々〚〘〫「〙〉【〆〽〨〹〮〧〷「〴〝〬〷〗『〔〷〮〟〲〬〸〸〟〹〆〖〨。〣〄』〴〚】〘〲〚〚〦〈〛〗〞〉〞〯〆〵〸 〗〕、》【〸〮〵〉〥〨〕〟〭【〾〇〵〬〾」〱〹〚〟〛〡」〩〃〄〬〱〭〚〱〆〛》〣』〝〡〦〣〫〒〗〛〿〤〇〼〠〲〢〬〿〓〠」〚〇〛〈〴《〦〱〤〹〝〱〶〟〙〴〶〣〝〮 〜〲〱〿〳〪〄〝〃〰〙〖〼〰、〬〰』「〭〻〮 〩』〱【〆〻〺〸〾〤〗〸〥〽〼】〤〣〖「《〡〙。〸、」』〠》〴〈、〴〢〣〲〟〳〸〒〠〣〵〢〿》』〿『〾〔〢〶〦〟〠《〹「〷〽〷〆〇〉〲〿〵〙』〫〠々々〘』《〽〒〦〽〓〳、〮〻〫〞〲〰】【〗」々〥』【〫〆〫〳〾〣〖〺〷〙〘〄〈〼、〧〻〭〮〳』〘〾〇〸〉〽〗『〙〽〻〟〇〘〽〖〴〄〓〞「〦〪〚〾〨。〕〻〰〟〉〢「〉〿〯〔〹〃〛〛〝〔《〵「「〴「〗〸〖〞〦【々〣〲〤〾〿〽〲〥〢〥』〳〳〼『】〆〼》〩》」。〛〲〡〳「〢〥》〘〠〃〳〃〒 〧〓〡〤〄〲」〦〶〷〟〛〠〱〽〫〫〸〇〔、〪〛〠 」〢〳〸『〸〚〹〈〘〉〫〇〲〲〈〕〙〱】〯々【〬〖〿〒】〔〭〣〚〄〈』〧〗〹】〇〬〸〾 〭〺』〯〫〻『〘〻〱 〴〆〘「〠〈〫〡》〤〕】〜〙〵〒〙。〦〮〞〪』〴〓〪〾〝〹〴〼《〦〞〖〆《〥〸〻〈〽〪〤【〖〶〞〤〃〰〨〱』〨〼〱〠〣」〝〹〝〕〼〔〃、〮】〤【〼〤〼〥〪〲〓〦〘〟〞〭〜〸】〚〸〵〞〙〧〈〽〹〄『『〙〓〸〯。〜〺。「〖 〶々〉〈〮《〢〭〶】〘〜〺〸〒〥〢〾〈』〱〃〤〳〖〉〼〫〛〚〽〫〳〰〫〥〜〜〺〷〲《〢『〛〭〈〧〳〣〜〝〧、〥〾〻〳〺〕〥〥〼》」〺〮〒〣〥〲〟〠〫」〾〱〼「〄〆「〓〽〹〵〈〙〛〵〰〩〟〫〈〔々〒〟々〉、〷〚〶〆〘〛。 【「〸〸〖〫〕〰〱〺〟〫〿〹〩〇。〾〒〚〲〾〛〳〨〦〙〒》。〺〧〡〞〒〚〩〪〶〘〣〨〶〩〛〺〙〪〄〼〮〰〒〡〼〓〙〒〇〽『〃 』〇》〽〃《〒〠「〚〨〗〶〴〪〮〵〘〨々〓〗〚〠』〗〮〳〺〲〙〒「〴〼〻〤〉〯〨〧〈】〾〟〝〒〃〘〧『〶〿、〤〝】〜〴〰〷〽〮〱、〩〽〺〯〫〜【〴〈〳〖〬】〦〘〗〜〝〄〚〚〤〨〲』【〞「〰〔》〷〥〈〡〳〢〾〮《〭〫〡〴〹〻〚「〰〻〉〣〢〤〤〝〩〧〙《〓】〺〺〓〿〹〈〚〱〬〘《〽〈〕》〣〓〒〴〆〜〭〖〛〝〷〧〴〮》〳〘〸〴〿〥〙〒〔「》〓〕〦〯〾〯〝、〮、〯〆〛』〞〝〵〥〬〚〡〰〔〵】。〽〥〿』〩〇〝〄〴〪〭〸〫〡〣〧〆〚〫〴〙〦〽〉〸〼。〱〨〛〠。〮』〝》〻〹〈〄《〻〱〥〞〽〾〄〝〢〿。〴〆〲『〰〢〖〲〼〯〃〠【〲〵〛〣〝〕〬〺〰〪〻『〨】〖〥〵〹〯 〒「〠〮〈〃〹〽〬』〹〷〫〕〧〟〒〉〉、〈「〟》〼〪〰〗〘『〞〉〹〚〤〩〦〗〖〮〰〇〠〫」〔》〮、〆〡〛〻〙「〵。〯〹〘「〵〫〼。】〃〢〺〴〛〪〬 〞〟〓」〭】、〸〘〻〈〤》〓〩〽〆〵〨〈「〦〠々〨〒〢〛〝〿〗〥〱〕〩〖〣〄〚〿〆〗〢〉々《〚〩〶》〥 【『〪〯〾〸〪〲〞〠 〡〓〻〷〢〕』「〹〯〛〫〲〗〗〚」〵【〪〢〥〫〆》〦〥〱〯【【〉〧〺〻〉〬〳〒〳〾〲〲〇〇。〪〙〧〿〆【」〇〪〸〽〦〚〽〿 〠〺〥〦々〬〄〟〪〭、】〴〾〸〛。 『、《〫〺〯〛〩》〓〴〪》」々〧【〦〇〮〬〲〗〔〦〴〣〼〨〖〩〬〼々〛〇」〴〦〉〤〺〪《 〒〧々〤〧〣〘【〵〛〢〵《〛〘〵〓〶〳〤〺〨〣〭〤〪〮〺〷《〗〵〞〻〠〭〃】〄〒〯々〶〉〞々〽〤〇〦『〦〽〩〬〠』〷〄〩〙〖〝『〘『々〔【〿〰〶〪〱〉〘》〃〙〧〦〇「》《〹〰〯】〹〄〈〪〜〵、〮〣〇〯〲〛〬〕】々〸〹〩〟〳〆〥〯〬〠〭〯『〙〆〾『「〈〬〹〕〾、〸」〷〥〆〺〾〖。〆〒〮〻〡。〉々〕』『〨〼 〢〓『〢」々『 〘、〖〤〜【々〤〷〵〳〤〽」〟〥〴』、〒〥〆〙〬〧〔〡〄》〷。〣〉〪〙〚〾〣〵〰〮〔〇〝〫〫〩。〪〷〩。〇〿】〲〦〳〕《〄〴〦〽〔、〱〧〟。〻〺〔〝【〲〔〦〙〖》〠〫】〵〙〰。〖〸〼〣〗〲々〤〢〷〝〰】〳〳〯〟〓〬〺〤〿〲〩〞〡〧〲〧〭〽〪〰〥〧〴〈〈〢〕〯〔〨々〭〸〡〖〓〤〒〝〻〻』〣々〸【〸〸〷〓〇〦〻〤』〉〾〛「〢〢《】〜々〛〇〠〒〹〖〽〮〚〫〜〼〄〓 〹〽《〽》〮【〺〦〠〨〰〸〘〲』」〹〳〤〽〴〴〰〳〷〟】〼〽〓〇、〡〚〶 〥〄〉〴〵、〷〳〥〬〳〓〩〯〜〪〯〬々〢〾〆〨〥」』〪〄〨〽〗〭〯〼〒〡「々〩』 〉〔〓《〉〺〫〖〽〱〳〡〪〯』〼〉〝〟〹〯〇〠〥〨〖「〢「〥〲〘『〹〥〶〜〥『〃。〲〗〢〩〮〕〨〸』〪〯〲】〠〻〟〶〣〸〵〩〔〾〞〳〾〇〵〥〟〭〳〡〆〾〤〶】〈〓〄〮〢〒〩《〔〭〄》『〰〧〡〖〵〥〵〒〭〳〵〝〜〱々〞〰〴〦〱〿〾〴〪〥〧〚〚〒〚〘〿〛〾〫〚〕〷〔〗〢〻〠』〘〾〖〿〦〥〮〆〼〞〴〹〸〻〵〞〄々〷〔《】〛〒〻〓〴〮〛〺》〫〬々〦〦〬」〯〞〼〚〘〰〿〝〾〘〠〵〴〃〞、〹〢〗〹〰〤「〔』〇〒〭〫 〞〉〿〜〳〫〩〿〧〵〟〾〤々〩〝「《〬〃〇〬】〔〇〆〷〭〬〵〾〚〺〬〧〻『」〈」〻〹〞、】。〉〯〫〺〒〙」〱〛〻「』〱〺〠〄【〿〦〰〸『〬〴〓〨〢《〣〓〜〒〡『〼〔『〵〕〝〗〳《〲〳〼〝「〽〬〱〺〠〱〽〘〗〹〨〆〕〠々〓〤】〺〉〴〰〮」〰〿〹〳『〠〔〇〧〭〼〪〭〯〖〶〬〃〱〔〙》〺〜〵々】〡〧〲』〕〛〳〥〩〱〮《〦〫】〖〈》〞〻〤〢〦〪〬〲〗〢〷  〳〰〓〕〜〥」〬〗〒〜〉〩〆〬々〿〪『〣〘〡〘〯〳【〄〠〸〼〈〰『。〟〲〭〡〷〥〯〴「。〤〓〪〆〦〆〒〽〫〰〚〡〨【〯〹「〧」〓〖〘〳」〕〲〚〣〕〆〃〱〞〷〺〻〃』〩〫〦〱〴〟、〰〘〞《」〛〤〿〔 〤〱》〗〷〡〡〗〞〦〿。〤〳】。〟〻〉「〻〙〖〿〄〶」〾〫〽〸〕〢〰〞〞〒〜〻〠〭〫 〞〴〰〶〺《〣々〩〲〡〴》、〩〝〞【〼〓〱〻〩〒〖〿〮〱〧〟〒〶、〿〈「〻〴』』〇〉〝〛〢〜〼〘〰〇〢〃〲〟〨〟〣〟〰〉〮〘〽〧。〓〳〩〺〳〓〘〗〖〈〜〴〟〽〣〣〾〽〩〲〜〇〰〩〕〧〚〄〴〴〴〨〠〦】、〣〺〖》〯〷』〒〤』〙〗〬。〧〆〜 〧〩〯〞〜〬〡〆、〞〔。〾〩〈〛〼」〾〮〤〾〟』〉〔〞〾〛〲《〈〫〝〽〳〞〔【〿〽〩。〈〨「〯《々〇、〯〜〾〝〯〼〆〟〉〝〮〙〪〚〮〱〹〯〜〟〠、〄〹〧〳〱〯〖〯】〩〴【〫〇「「〿〩〷〾〴〯〦〼〦〟〖〤〪〥〰〔〻〪〄〖〳〵〟〕〰〬〶〚『〘〻〇〽〪「〉】〮「〣〿〇〭〕〓〵〽〆〳 〨〩〕〬〵〸〻〲【『〥〖〚〢〰』〠。、〮〣〆〴『《〲〓〷《〱〰々〫〶〢〯〗〚〙〶〫〖〃〻》〰『〱〘〫〛〄〉「〠〱〚〖〕》〤《 〵〶〢〯〗〳〛〚〽〗〟〛〪〾〶〞〶々〆〯〇〝〕〨〨〣〫〄〵〞〛〬〣《〦〦〒〉〙〫》〞〨〜『〝〻〒〟〓〜》〡〡〫〻』〆〒 〔。〓》《〨〙〿〙〔〘〮〦「〚〻、「〵〠〉〬。〭】〱〸「〶〈〞〈〪〟〻〝〲〮〆〼〯『〱〡〙〮〕〒〣」〳〥〙〡〡『〇〠〡〭〷〜々』〣」〼々、〗〡『〽〻〽〳〉〄〵〬〽〯〥〾〙〉〿〮〴〷〥〡〰〹〰《〺【〒」〙〾〽。〴〘〕〝 〳」〡〇〩〥〾〆〨〉〫〠〙〤〒【〸々〣〓〰」〈〪〵〠〚】〈〆〵〗〜〦〣〃〼〔〉》〆〞〚〆〄〫〺〽〪々〩〴〵〹〿〔〥〜〩〪〤〗。「〽〨〟、〄〽】〩〙〝〺〶〸〟〯《《〥〣〻『〟〽〮〄》〙〕『」〾〼〷』々〥〒【、〗〔〯】〮 〹〩】〡〇〟〫〢〨〡〭」〄〼〙〪〻〪々〙。〫〧〪〞〾〄』〟〶〇〞〜〥〘。〝〨〸】〕〔〨〕〾〃〾〒」〈〒〓〼〗〖〕〱〙〘〓〝〾〔【〵〿〖〸〷〵〩【〞々〼〢〧〻〥〰〦〤》〰〛〡。〖〝〙〒〽〜〕〘「。〵〇〒〾〼〽〈〣〇〒〙〢〸、〞〲》〪〰〴 〽〭〷〸〫〆〞〾〨〆〛〔〤〜」〈〨〃〈〴〽〲」。【〞〒〉。〱〕〨〽。 〷』〦》〵〩〪〡〕〞〹〃〧〃〝〢〴。〃〛〭〻〣〸〖〞〻【〛》〜〳〜〟〘〄」〸〬〶〥》〨〭〡〦〇〇《〱】〸〼〺〬〛〓〔」〰〈〧、【〕」〳〼〗〯〉〒〖」〧〩》〴」〺。〰〷』〩〚〭〞〰〶〚〲〙〥〢。〽〵〱」】〓〘〦。〭《〥〙、〱〹〦】〕》〲、〘〓〙〷、〪〕〉〭、〇〜々〖〨〞」〠〕〲〨〕〔〻〿〙〘〙』〼〘〡〢〧〚〢〷〸〰〟〰〗」〪〛【〪〺〒〱〈〦〽、『〥 〙〪〕〝〄〛〣〴〯〆〒〰〜〪〆〠〞〾〃〭〬〡〉】〄〃〥〥〒〶〕〢〵〣〢〨〘〩〹〖〧〒〺〫〕〡〆〭〘〿〠〹〲〔〫》〪〰〇「〯〫〈〾〱〄、〮『》〹〿〿〱〦】〳〰」。【〘〆〞〚〱》〫〷〸〠〲〚〶〷〘〩〯〛〄々 』〪〭〬〖〪〦々〼》〇〤。〉〯〟〮〢〤〬〜〪〬〺〿〹〖〔】〕〖〣 『〵〸》〧〻〺〜〧〯〄"; - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); checkAnalysisConsistency(random, a, random.nextBoolean(), s); @@ -193,7 +193,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void test4thCuriousString() throws Exception { final String s = "\u10b47\u10b58\u0020\u0078\u0077\u0020\u0020\u006c\u0065\u006c\u0066\u0071\u0077\u0071\u0062\u0020\u0079\u0078\u0069\u0020\u101da\u101d5\u101e6\u0020\u0074\u0020\u2c55\u2c18\u2c2d\u2c08\u2c30\u2c3d\u2c4f\u2c1c\u2c1b\u2c1c\u2c41\u0020\u003c\u002f\u0073\u0020\ue22a\u05d9\u05f8\u0168\u723b\ue326\ubf5f0\u0020\u0063\u006a\u0072\u0070\u0061\u006b\u0061\u0071\u0020\u0028\u005b\u003f\u0020\u003f\u003e\u0036\u0030\u0020\u0020\u0065\u0068\u006a\u006b\u0075\u0074\u0020\u0068\u0067\u0020\u0071\u0070\u0068\u007a\u0061\u006a\u0062\u0065\u0074\u0069\u0061\u0020\u006d\u0079\u0079\u0065\u0067\u0063\u0020\u3066\u3082\u308e\u3046\u3059\u0020\u2125\u2120\u212d\u0020\uffbe\uff5c\u0020\u0067\u004c\u0025\u0020\u0020\u2df6\u0020\u006b\u0020\u0066\u006a\u0070\u0061\u006e\u0064\u0020\u0067\u0072\u0073\u0020\u0070\u0064\u0063\u0020\u0625\u0278\u6722d\u2240\ufd27\u006a\u0020\u4df1\u4dee\u0020\u0072\u0065\u0063\u0076\u007a\u006f\u006f\u0020\ue467\u9d3a0\uf0973\u0218\u0638\u0020\u0019\u0050\u4216c\u03e6\u0330\u894c2\u0020\u0072\u006d\u0065\u0020\u006e\u0061\u0020\u0020\u006d\u0075\u0020\u0020\u0063\u006f\u0074\u007a\u0020\u0069\u006a\u0076\u0078\u0062\u0061\u0076\u0020\u1c26\u1c2c\u1c33\u0020\u0067\u0020\u0072\u0068\u0073\u006a\u006e\u0072\u0020\u0064\u003f\u0064\u0020\u0020\u0073\u0073\u0073\u0072\u0020\u0061\u0020\u0076\u0077\u0062\u0020\u007a\u0020\u0077\u0068\u006f\u0062\u0062\u006e\u006f\u0070\u0064\u0020\u0020\u0066\u0073\u0076\u0076\u0070\u0066\u006c\u006c\u0066\u0067\u0020\u006c\u007a\u0065\u0078\u006e\u0020\u006d\u0066\u0020\u005b\u0029\u005b\u0020\u0062\u0076\u0020\u1a12\u1a03\u1a0f\u0020\u0061\u0065\u0067\u006e\u0020\u0056\u2ab09\ufd8b\uf2dc\u0020\u006f\u0020\u003a\u0020\u0020\u0060\u9375\u0020\u0075\u0062\u0020\u006d\u006a\u0078\u0071\u0071\u0020\u0072\u0062\u0062\u0073\u0077\u0078\u0020\u0079\u0020\u0077\u006b\u0065\u006c\u006a\u0020\u470a9\u006d\u8021\ue122\u0020\u0071\u006c\u0020\u0026\u0023\u0036\u0039\u0039\u0020\u0020\u26883\u005d\u006d\ud5a0e\u5167\ue766\u5649\u0020\u1e0c\u1e34\u0020\u0020\u19ae\u19af\u19c3\u19aa\u19da\u0020\uaa68\uaa78\u0020\u0062\u006b\u0064\u006f\u0063\u0067\u0073\u0079\u006f\u0020\u0020\u2563\u2536\u2537\u2579\u253f\u2550\u254c\u251d\u2519\u2538\u0020\u0070\u0073\u0068\u0020\u002a\u0061\u002d\u0028\u005b\u0061\u003f\u0020\u0020\u31f9\u31fc\u31f7\u0020\u0029\u003f\u002b\u005d\u002e\u002a\u0020\u10156\u0020\u0070\u0076\u0077\u0069\u0020\u006e\u006d\u0073\u0077\u0062\u0064\u0063\u0020\u003c\u0020\u0020\u006a\u007a\u0020\u0076\u0020\u0020\u0072\u0069\u0076\u0020\u0020\u03f2\u03d0\u03e3\u0388\u0020\u1124\u11c2\u11e8\u1172\u1175\u0020\uace9\u90ac\ua5af6\u03ac\u0074\u0020\u0065\u006a\u0070\u006d\u0077\u0073\u0020\ue018a\u0020\u0077\u0062\u0061\u0062\u007a\u0020\u2040\u204f\u0020\u0064\u0776\u6e2b\u0020\u006a\u007a\u006e\u0078\u006f\u0020\u030f\u0334\u0308\u0322\u0361\u0349\u032a\u0020\u006f\u006e\u0020\u0069\u007a\u0072\u0062\u0073\u0066\u0020\u0069\u0079\u0076\u007a\u0069\u0020\u006b\u0068\u0077\u0077\u0064\u0070\u0020\u3133\u3173\u3153\u318c\u0020\u007a\u006c\u006a\u0074\u0020\u0065\u0064\u006b\u0020\u002b\u002e\u003f\u005b\u002d\u0028\u0066\u0029\u0020\u0020\ua490\ua49e\u0020\u1d7cb\u1d59f\u1d714\u0020\u0070\u0075\u0061\u0020\u0068\u0020\u0063\u006e\u0020\u27b1\u271c\u2741\u2735\u2799\u275d\u276d\u271b\u2748\u0020\u55d4\uec30\u1057b4\u0382\u001b\u0047\u0020\uf1a9\u0a76\u002d\u0020\u005d\u005b\u0061\u005d\u002a\u002d\u002b\u0020\u2d05\u2d22\u2d03\u0020\u0073\u0064\u0068\u006b\u0020\u0067\u0079\u0020\u2239\u2271\u22fc\u2293\u22fd\u0020\u002c\u0062\u0031\u0016\uf665\uf0cc\u0020\u0064\u0068\u0074\u0072\u0020\u006b\u006c\u0071\u0061\u006d\u0020\u005b\u005b\u0020\u41dad\u721a\u0020\u39f2\u0020\u0020\u13f4\u13e4\u13a3\u13b8\u13a7\u13b3\u0020\u0049\u0004\u007b\u0020\u13420\u0020\u0020\u2543\u252f\u2566\u2568\u2555\u0020\u007a\u006e\u0067\u0075\u006f\u0077\u0064\u0077\u006f\u0020\u01d4\u0508\u028d\uf680\u6b84\u0029\u0786\u61f73\u0020\u0020\ud7ee\ud7fd\ud7c5\ud7f4\ud7e1\ud7d8\u0020\u8c6d\u182a\u004f\uf0fe\r\u8a64\u0020\u0064\u0077\u0068\u006f\u0072\u0061\u0020\u006b\u006a\u0020\u002b\u002e\u0028\u0063\u0029\u0020\u0071\u0018\u2a0a\ubfdee\u0020\u0020\u0020\u0020\u003b\u0020\u4dda\u0020\u2ac76\u0020\u0072\u0078\u0020\u0020\u0061\u0073\u0020\u0026\u0020\u0068\u0077\u0077\u0070\u0079\u006f\u0020\u25cde\u05b2\uf925\ub17e\u36ced\u002e\u0020\u2e285\ue886\ufd0c\u0025\u0079\ueecb\u0038\u0020\ud03c\u0039\n\uc6339\u0020\u0077\u0074\u0020\u0065\u0069\u0064\u0065\u0020\u0075\u006e\u007a\u006d\u0061\u0074\u0020\u0066\u0064\u007a\u0070\u0020\u13114\u1304d\u131c3\u0020\u006f\u0061\u0067\u0071\u0070\u0067\u0020\u0069\u0020\u1f007\u0020\u0070\u006f\u0020\u002e\u005d\u002a\u0020\u0062\u0075\u0077\u0020\u0020\u0021\u0038\u0020\u006f\u0072\u006f\u0078\u0020\u0070\u0020\u12a2\u0020\u25e1\u25e7\u25be\u25c9\u25c6\u25dd\u0020\u0062\u0062\u0065\u0069\u0020\ua6a7\ua6d4\ua6cd\u0020\u006e\u0063\u0076\u0069\u0020\u003f\u002b\u007c\u0065\u0020\u0075\u0062\u0076\u0065\u0073\u0071\u006d\u006f\u0073\u0020\u0071\u0020\u10282\u0020\u174f\u1742\u1758\u1750\u1757\u1752\u174d\u175f\u0020\u006f\u0020\u0020\u0068\u0077\u0020\u0020\u053a\u0036\u0286\u0037\u0014\u05f1\u0381\ub654\u0020\u006b\u006b\u007a\u0079\u0075\u0020\u0076\u0072\u006d\u006d\u006a\u0020\u0074\u0020\u0075\u0074\u0020\u0639\u0057\u0235\u0020\u006d\u0064\u0061\u006e\u0079\u0020\u003c\u2b7c6\u0020\u0063\u0061\u006d\u0068\u0020\u835f\u0572\u20b2\u0020\u0066\u0068\u006d\u0020\u0071\u0063\u0061\u0079\u0061\u0079\u0070\u0020\u0061\u0063\u006a\u0066\u0066\u0068\u0020\u0077\u0068\u0074\u0074\u006c\u0061\u0020\u0020\u0077\u0064\u0073\u0020\ue0068\u0020\u0019\u0048\u0034\u0020\u0064\u0068\u0077\u0062\u0020\u006e\u006c\u0079\u0061\u0062\u006f\u0074\u0020\u0074\u0065\u0077\u0020\u0063\u006f\u0065\u006a\u0020\u1b3f\u1b7a\u0020\u0020\u0020\u1f00b\u0020\u0020\u0061\u102c5c\ue1b9\u0020\u0071\u0069\u0067\u0066\u0020\u0016\u8e2f\u005f\u0067\ud6c2\u0020\u0073\u0071\u006f\u0020\u006e\u0078\u0066\u0063\u0066\u0064\u0069\u006e\u006e\u0020\u0024\u0078\u59d1\ueacd\u0020\u25367\u07ac\u5652\u0020\u2592\u2588\u0020\u007a\u0068\u006f\u006c\u0078\u006a\u0064\u0020\u0070\u0065\u006a\u0076\u006d\u0079\u0020\u0020\u0066\u0061\u0063\u006f\u0020\u006d\u0072\u006e\u0061\u0070\u0020\u0062\u0075\u0075\u0020\uf2e2\u07d9\u0020\u1cd1\u1cee\u1cf3\u1cdc\u1cf4\u1ce5\u0020\u006a\u0077\u006b\u007a\u0020\u0079\u006e\u0062\u006c\u0020\u003b\u003e\u003c\u0070\u003e\u003c\u0020\u007a\u006c\u006d\u0020\u0020\u0078\u0062\u0079\u006d\u006b\u0020\u0065\ue74e\u00d7\u5cb6\u0020\u006a\u0062\u0020\u006b\u0067\u0074\u006e\u0071\u0065\u0069\u0075\u006f\u0020\ued7a\uae84f\u0052\ucf09\u0292\u265e\u0456\u0020\u0063\u0064\u006a\u0062\u0075\u0077\u0020\u0020\u10ac\u10f1\u0020\u013a\ue711\u0075\u0000\u0020\u0020\u2b30\u2b25\u2bf3\u2b5d\u2b21\u2b86\u2b45\u0020\ua830\ua83c\ua830\u0020\ue864\uf7ce\uf5c8\uf646\uec28\uf30e\uf8ab\u0020\u31c9\u31e8\u31d6\u0020\u0020\u0074\u0075\u0065\u0070\u0020\u0067\u0078\u0062\u0068\u0071\u0069\u006a\u0020\u1dc2\u0020\u0070\u006b\u006d\u0020\u0020\u50ba6\ue1a9\uc0bb\u59a1\u0020\u2fa3\u2fac\u2f8c\u2f35\u2f5b\u2f7e\u2f62\u2fd8\u2fc7\u2f2b\u0020\u0065\u0064\u0078\u0072\u006e\u0062\u0020\u0073\u0069\u0063\u0073\u0067\u0068\u0061\u0069\u0020\u0020\u0062\u006a\u0066\u0020\u2fae\u2fa4\u2f24\u2f04\u0020\uec10\u4a64e\u0038\uf806\u006d\u4ea8\u0020\u006a\u006c\u0020\u0020\u4deb\u4dc8\u4dd8\u4dc2\u0020\u0020\u1d24d\u1d209\u1d23c\u0020\ue4288\ufdd9\ue4e2\ucd9a\u0014\u006d\u0020\u0020\u31c3\u31e2\u31ed\u31db\u31dc\u0020\u0074\u0079\u0067\u0072\u0020\u003e\u0026\u0023\u0020\u0065\u006c\u0068\u0072\u0065\u006c\u0020\u03fc\u192c\ua9838\u5261b\u0031\u0020\u6e84\u44c1\u0020\u1f016\u0020\uf635\u002f\u0042\u0760\u0020\u006a\u0020\u0064\u006e\u0076\u0020\u0079\u0061\u0079\u0020\u003c\u0020\u058b\uf7e0\ufd7b\u07b7\u0020\u0079\u006d\u0074\u006a\u0020\u006d\u0064\u0069\u0020\u0020\u0072\u007a\u007a\u006e\u0070\u0020\u0020\u0020\u0079\u0071\u0065\u0068\u0072\u0020\u2d1f\u2d09\u2d1e\u2d21\u0020\ua859\ua85d\ua84e\ua84d\ua84a\ua859\ua873\u0020\u0065\u0020\u006c\u0071\u0070\u0074\u0069\u0020\u006a\u0066\u0078\u006b\u0076\u0067\u0071\u0069\u0020\u0026\u0023\u0078\u003c\u002f\u0073\u0020\u002e\u0029\u0020\u10298\u1029c\u1029b\u10293\u0020\ub1c5\u0600\u5fe3\u0632\u05f6\u0020\u003f\u003e\u003c\u0073\u0063\u0072\u0069\u0070\u0020\u2ff8\u2ff5\u0020\u006d\u0068\u0079\u0020\u003c\u0021\u002d\u002d\u0020\ued87\u53f6\u0428\u001d\u616f\uf1bf\u0034\u0020\u0d66\u0d2c\u0d01\u0d57\u0d43\u0020\ufb01\ufb1e\ufb4f\ufb08\u0020\u0076\u0071\u0075\u0020\ufee4\u269a\ued60\ue346\u007d\u0020\u0020\u006e\u006d\u0061\u006c\u0069\u0020\uf928\ufa9c\ufa27\u0020\ufe2d\ufe22\ufe2c\ufe23\ufe2c\ufe2f\ufe23\u0020\u247c\u24cb\u24fe\u2486\u248d\u24e9\u24d8\u0020\ufe68\u0020\u0077\u007a\u006f\u006f\u006d\u0065\u0020\u0007\ufcbd\u3d085\u0020\u0073\u006e\u0073\u0069\u006f\u0020\u1049f\u0020\u0062\u0078\u0020\u0074\u0020\u006f\u0062\u0076\u006e\u0075\u007a\u006e\u0073\u006c\u006f\u0067\u0020\u0026\u0023\u0078\u0035\u0065\u0039\u0039\u0066\u0020\u006f\u0020\u0058\u83d7\uf4d7\u9b59\u0020\u256f1\u0c8e\u005a\u024d\u0055\u0020\u0063\u0078\u006a\u006e\u0063\u006a\u0066\u0020\u0069\u0075\u0020\u0068\u0063\u0079\u0078\u0071\u0076\u006a\u0061\u0020\u13e2\u13ee\u13ef\u13cb\u0020\u0064\u006b\u0079\u0020\u0072\u0073\u006a\u0020\u006a\u0020\u003f\u003f\u0020\u0077\u0020\u0960\u0937\u0921\u0948\u095f\u0930\u0900\u090a\u0020\u0078\u0020\u0063\u0066\u0066\u0063\u0064\u006a\u006f\u0068\u0070\u0020\u002a\u0029\u0020\u0065\u0020\u003c\u003f\u0020\u0020\u0067\u0075\u0070\u007a\u0020\u1cf9\u1cd7\u1cd2\u0020\u024d\u067c\u05a8\u8bbc\ue605\u0647\u0020\u002b\u0020\u0068\u0020\u013f\uf379\uecc3\ue576\u002b\ufff9\uf03f\u0020\u00ab\u00d9\u0092\u0020\u0075\u0069\u0020\u0061\u0073\u0065\u0070\u0068\u0020\u0066\u0071\u0075\u0075\u0078\u0065\u006c\u0020\u1c5d\u1c6d\u0020\u007a\u0070\u0077\u0020\u0020\u0062\u0071\u0071\u006c\u0063\u0020\u065c\u06b5\u540c\u0020\u10917\u0020\u0065\u0076\u0076\u0077\u0020\u057c\u0020\u006b\u006a\u0075\u0069\u0020\u0067\u0062\u0072\u0072\u0074\u0069\u0072\u0070\u0020\u0061\u0069\u0079\u006f\u0071\u006d\u0066\u006d\u0068\u0020\u0074\u006b\u0020\u0071\u0077\u006e\u0071\u0067\u0066\u0020\u0061\u0076\u0063\u006a\u0071\u0078\u0020\u002d\u21d07\u0044\ufcef\u0020\u4a850\u3c7d\u69ac\u5231\u0020\u006d\u0067\u0063\u0073\u006d\u0073\u007a\u0064\u0020\u005b\u0020\u0006\u06d3\ufafe\ud13a\uf13e\u045c\u0013\u0020\u0028\u0029\u005b\u0020\u006f\u0074\u0020\u1693\u168d\u1698\u168c\u1689\u1696\u168f\u1696\u169c\u1684\u0020\u0072\u0020\u6381\u76ae\u6974\u65e1\u6c86\u8ab6\u0020\u0067\u0075\u0076\u006c\u0062\u0070\u0070\u0020\u007a\u006c\u0078\u0069\u0020\u1ce5\u1cd5\u1cf4\u1cd8\u0020\u1d376\u1d366\u0020\u0020\u0063\u0078\u006b\u0020\u005c\u0022\u003f\u0020\u19e8\u19f2\u19ec\u19e0\u0020\u0066\u0065\u0074\u0074\u006b\u0020\u0061\u0074\u0066\u006b\u006f\u0020\u0064\u0062\u0079\u0068\u0020\u0073\u0061\u0020\u3122\u3121\u0020\ufe98\ufed8\ufee0\ufec0\ufe7a\u0020\ucc8e\u001a\u1f84\u0020\u0073\u0073\u0072\u006f\u0020\u005b\u0020\uf6bb\ue660\u005f\u0ab8\u051c\u0020\u0062\u0073\u0077\u0020\u1a64\u1a82\u1a6b\u1a8c\u0020\u006d\u0078\u0020\u0020\u006e\u0061\u0063\u0078\u0068\u0062\u0020\u0070\u0061\u0072\u0068\u0020\u0020\u0069\u0020\u10850\u1084f\u0020\u4997\u4768\u40b0\u487c\u348f\u372f\u3b82\u46cb\u0020\u180f\u1879\u1874\u1841\u1814\u187a\u184c\u18a2\u1805\u1811\u0020\u184d\u0020\u0071\u0074\u007a\u0065\u0020\u10b5b\u0020\u005d\u77c4\u0744\u5c73\u455d5\u0721\u757f\u0020\u2d71\u2d6c\u2d4d\u2d36\u2d43\u0020\u0061\u0073\u0070\u0067\u0020\u006b\u007a\u0078\u0020\u0020\ua839\ua839\ua830\u0020\u17f4\u17fe\u17db\u0020\u0078\u0078\u0072\u006d\u0069\u006e\u0073\u0020\u0061\u0020\u0075\u006c\u0071\u0020\u006e\u0064\u0073\u006c\u0065\u0071\u0073\u0020\u0078\u0078\u0073\u0020\u0013\u052c\uf48c\uf52b\u4f95\u077a\u0020\u2d81\u0020\ubd0e\uec01\ueeda\u001e\u0045\u0020\ua811\ua816\ua823\ua818\ua815\ua812\ua813\ua809\ua806\u0020\u006e\u0020\u1311b\u0020\ucf09\u00da\u0041\uf001\u00db\ue292\ue170\u95dd\u0064\u006a\ua99b\u0020\u0070\u0020\u006f\u006d\u0074\u006f\u0066\u0020\ubd23\u0020\u0073\u006c\u0064\u006e\u0079\u0078\u0062\u0071\u0020\u0020\u10564a\u0020\u0077\u0069\u0020\u0072\u0077\u0020\u0069\u0065\u0068\u006a\u006b\u0067\u0066\u0020\u006e\u006e\u0078\u0067\u0062\u006d\u0071\u0020\u0063\u0071\u0074\u006d\u006d\u0020\u2681\u0020\u0020\u0071\u0064\u0065\u006b\u006b\u0067\u0066\u0020\u0075\u0062\u0062\u0020\u0064\u0067\u006a\u0069\u006c\u0077\u0070\u0020\u0079\u0067\u0020\u0063\u0075\u0072\u0070\u0064\u0079\u0020\u2135\u214d\u2110\u2103\u0020\u2c56\u2c35\u2c4a\u0020\u003f\u002a\u005d\u002b\u0064\u0020\ufada\ufa20\ufab2\u0020\u0068\u006f\u0020\u006e\u006f\u0020\u1dca\u1df8\u1dfd\u1dd8\u1de4\u1dfe\u0020\u0079\u0065\u0075\u0020\u0079\u0075\u0072\u0020\u0076\u0074\u007a\u0066\u006a\u0065\u0067\u0020\u0073\u0074\u0078\u0020\u007a\u0020\u007c\u0028\u005d\u002b\u003f\u0029\u0020\u25a90\uc35f1\u0001\ue6c7\u0020\u002b\u002e\u002a\u003f\u002b\u002e\u007c\u0020\ucdee\u6d77\ueeb2\u8a3c\u0020\u003c\u002f\u0020\u0061\u0065\u0076\u0064\u0062\u0020\u006c\u0077\u0020\u006d\u0063\u0020\u006f\u0072\u0068\u006b\u0065\u0020\u0066\u0020\u0079\u0061\u0077\u006c\u006a\u0064\u0020\u0009\u0034\uf39c\u0019\ub0289\u0020\u002d\u007c\u007c\u003f\u0020\u1109f\u1108a\u11085\u0020\ufd8f\u0020\u0020\ufc09\ufdee\ufc9a\ufbba\u0020\u0020\u0076\u0071\u0065\u0070\u0020\u0071\u0075\u0020\u006f\u0071\u0067\u0074\u0067\u0065\u0020\u0074\u0076\u0077\u0020\u0074\u006c\u0063\u0078\u0020\u0063\u0061\u0072\u0062\u006d\u0064\u0020\u006c\u0073\u0068\u0079\u0067\u0068\u0065\u0020\u11ffd\ue885\ub1c05\u000e\u0020\ufe87\u0020\u0078\u0069\u0020\u0076\u0078\u0020\u006a\u0066\u0066\u006b\u0020\u006a\u0070\u0079\u0074\u0068\u0067\u006b\u0064\u0070\u0020\u006b\u0020\u006e\u0076\u0020\u2984\u29e4\u0020\u0075\u006a\u007a\u0063\u0075\u007a\u0020\u0025\u0023\u005f\u002e\u019d\u0020\u006d\u0068\u006a\u006a\u0069\u0020\u0063\u0020\u0020\u0020\u10b5c\u10b52\u0020\u0020\u1f00f\u1f02d\u0020\u0004\u0516\u0020\u006b\u006f\u0069\u0020\u0132\u0132\u0103\u0174\u0161\u015e\u0170\u0020\u2b06\uf8f8\u000b\u0020\u07da\u07f7\u07ed\u07c6\u07cc\u07f7\u07f5\u07f8\u0020\uf934\u0020\u0079\u0020\u1435\u14df\u0020\u42e4\u8e48a\u0045\u0070\u0020\u0026\u0023\u0020\u007c\u0029\u002e\u005d\u002e\u0063\u002b\u0020\u0073\u0073\u0020\u0061\u0066\u0072\u0067\u0074\u0020\ua0c4\uc26b5\u381c\u0020\u007c\u0062\u002e\u0028\u003f\u007c\u0020\u0066\u0065\u0062\u0020\u0071\u0071\u007a\u006b\u006a\u0067\u006c\u0065\u007a\u0067\u0020\u0061\u006e\u006d\u0071\u0072\u0020\u2424\u2421\u0020\u0076\u0070\u0020\u0075\u0020\u31eb\u31c6\u31e3\u31e5\u31c2\u31da\u31e9\u0020\u0063\u0061\u0075\u006c\u0077\u006a\u0020\u0074\u0066\u006c\u0064\u0069\u0073\u0075\u0066\u0020\u0a25\u0020\u0064\u0066\u006b\u0020\u006a\u0079\u006b\u0063\u0020\u0065\u005d\u005d\u003f\u0020\u0067\u0061\u0020\u006a\u0068\u0063\u006f\u0020\u0020\u2fbc\u2f72\u2fdf\u2f6d\u2f83\u2f09\u0020\u0075\u0066\u0067\u0063\u0071\u006e\u0077\u0020\u0067\u0020\u2557\u2508\u2553\u2500\u2573\u2517\u2560\u2513\u0020\u0075\u0067\u0073\u0063\u0020\u005b\u0029\u002d\u007c\u002a\u005b\u0020\uf36f\u38f4\u9170\u365e\uf686\u0020\u0061\u0075\u0020\u10b23\u0020\u0074\u006e\u0076\u0064\u0071\u0069\u0020\u172d\u1725\u0020\u0020\u4dcf\u4dfd\u4df0\u4de6\u4dee\u4dd7\u4de8\u4df1\u4dcd\u0020\u07dc\u07ea\u07d4\u07f1\u07d2\u07da\u0020\ufe05\ufe09\ufe0c\ufe06\ufe0d\ufe01\ufe0b\u0020\u0072\u0073\u0063\u007a\u0065\u0074\u0020\ua07f\ua2a7\u0020\u0064\u0075\u0070\u0020\u0069\u0073\u0062\u0076\u006a\u0020\u0499\ueb62\ue54c\u0010\u02b4\uea44\u0079\u0053\u0020\u0078\u0069\u006d\u0061\u0073\u0062\u0074\u0068\u0078\u0020\u0078\u0079\u0020\u2d2e\u2d1e\u2d2f\u2d25\u2d2d\u0020\u1893\u184d\u181a\u1896\u188e\u18a0\u181f\u0020\u090d\u0943\u0020\u0664\u0206\u0020\u006e\uea32\u01c6\ue612\ue159\u0020\u006b\u0074\u0020\u002b\u002b\u002b\u0020\u0127\u0123\u0129\u0020\uc69d\ud58c\uc505\uc4b9\ub486\ub35f\ub46b\uc5d3\u0020\u007a\u0020\u0068\u0020\u007a\u0073\u0061\u007a\u007a\u0078\u0077\u0020\u0075\u006d\u0020\u0078\u006d\u006a\u0071\u0074\u0020\u005c\u005c\u0027\u0020\u0020\u00c6\u00a0\u0020\u0061\u0076\u0076\u0070\u0078\u0066\u0020\u0061\u0065\u0077\u0020\u0079\u0061\u0072\u0065\u0076\u0020\u0062\u006f\u0072\u0020\u0031\u0032\u0035\u0035\u0020\u005b\u0063\u0029\u002d\u003f\u0020\u0020\u8063\u000f\u9355\u0020\u0038\u0020\u0061\u006c\u0062\u006d\u0067\u0065\u0075\u0078\u0064\u0061\u0073\u0020\u0020\u1d0df\u1d06b\u0020\u1c5c\u1c5b\u1c5f\u1c73\u1c56\u1c5a\u1c60\u0020\u0038\u003b\u0127\u0049\u042f\u0020\u1048c\u0020\u0020\u0066\u0074\u0070\u0077\u006d\u006f\u0020\u7b1f\u0020\u006f\u0074\u0069\u0074\u0063\u0079\u0020\u0069\u0069\u0020\u003c\u0020\u0020\u003c\u002f\u007a\u0074\u0020\ua83e\ua837\ua834\ua834\ua837\ua83b\ua832\u0020\ue93e\ufe11\u863a\u2cae\u0020\uf1c2\u66e9\u0020\u004b\ue9ba\uf13d\u027d\u004c\u80f3\u003d\uffb8\u48cf\u0020\u2f80c\u2f9c9\u2f949\u0020\u0041\u004b\ue13d\u15e1\u0020\ua830\ua83f\ua833\ua835\ua839\u0020\ufe25\u0020\u0020\u0067\u0066\u0079\u0070\u0070\u0063\u0020\u0764\uf3d3\ue6da\uf11c\u0020\u0064\u0062\u0065\u0077\u0077\u0064\u0065\u0020\ue44c\u0297\u67d5\uf53d\u0020\u02ed\u0020\u1204\u0020\uffb3\u0020\u02bd\u050c\u0065\u0054\u0046\u0020\u003f\u002a\u002d\u0028\u0020\u0029\u003f\u0028\u002d\u002e\u003f\u0020\u0075\u0061\u0068\u0063\u006c\u0020\u0062\u0076\u0065\u0020\u0064\u0062\u0074\u0026\u0023\u0078\u0020\u0333\u0020\u0020\u0069\u006c\u0020\u006f\u0075\u0069\u006d\u0020\u0074\u0063\u0064\u0075\u0020\u2d01\u2d09\u2d02\u2d27\u2d09\u2d07\u2d06\u2d1c\u2d22\u2d2d\u0020\u7457\u05ab\u308b\u0280\u462e\u0478\u01d3\u01e5\u0020\u0078\u0061\u0062\u0079\u0020\u0020\u0063\u0065\u0078\u0064\u0079\u0079\u0020\u0075\u0074\u0066\u0020\uf73e\ub167\uf181\u0297\u0030\u0241\u0067\u97c2d\u0020\u0064\u0020\u03b5\u03f6\u03e2\u03cf\u038e\u03f7\u039e\u037a\u0020\u0063\u0079\u0066\u0069\u0020\u005d\u007c\u002b\u002d\u002d\u002a\u0020\u003c\u0020\uff70\uff66\uff68\uffa7\uffe0\uffd8\uff7f\uff7b\u0020\u0074\u0065\u0020\u0020\u0077\u006d\u0067\u0020\u007f\u004c\u0020\u0020\u168c\u1691\u0020\u0033\u487c\u1da44\ub941\u0020\u0020\u0066\u006f\u0068\u0076\u0074\u0020\u006b\u0020\u0744\uffa8\uabc3\u8bcd2\u0020\u0020\u1048b\u10484\u0020\ua860\ua863\u0020\u0073\u0072\u007a\u0067\u0077\u0077\u0020\ued7d\u0021\u0671\u9e8f\ua71b7\u0020\u0020\u0076\u0079\u006c\u0063\u006c\u006c\u006d\u0020\u002a\u003f\u002d\u005d\u007c\u002b\u003f\u0020\u0077\u0072\u006f\u006d\u0068\u006a\u0073\u0077\u0020\u0020\u0020\u0069\u0079\u007a\u007a\u0070\u0063\u006e\u0074\u0072\u007a\u0020\u0069\u0061\u0063\u0075\u0068\u0020\u0020\u0062\u006f\u0020\u0020\u0072\u0069\u0068\u006f\u0020\uca0d1\u078a\u0079\u0020\ue9e3\u5cc3e\ue79b\uf262\u0683\u0083\u0020\u0020\u0072\u0076\u0069\u0067\u006f\u0066\u006c\u0078\u0020\u0078\u0069\u007a\u0020\u002a\u007c\u005d\u002a\u002b\u0020\u05ce\u05c0\u05ca\u05c9\u0598\u05fa\u05d7\u0020\u007a\u006a\u0020\u0072\u0068\u0020\u0074\u0068\u0070\u0020\u0079\u0063\u006e\u0020\u0020\u054b\u04ac\uecc8\u0020\u0067\u007a\u0062\u0077\u006d\u0076\u0020\u0065\u006b\u0078\u0020\u002e\u0020\u0077\u0076\u0070\u0064\u0078\u006b\u006f\u0020\u006a\u0077\u0020\u0020\u30a0\u30e0\u30d8\u30b7\u30e4\u30b2\u30d0\u0020\u006e\u0073\u0020\u006b\u0063\u0075\u006f\u0020\u07bb\u043f\u0761\u0020\u06ca\u21ef7\u0075\u0020\u006e\u0069\u0078\u006f\u0076\u0020\u0067\u0062\u0020\u0074\u006d\u0074\u0068\u0020\u0061\u006e\u0071\u0079\u0020\u10cf\u10cd\u10f1\u10c9\u10ec\u10cf\u10bc\u10ff\u0020\u003c\u0021\u002d\u002d\u0020\u007a\u0067\u0076\u006c\u0078\u0020\u0078\u0074\u0065\u0064\u0020\u0066\u0079\u0061\u0061\u0020\ufc00\u8684\u0020\u3120\u3113\u312e\u312b\u3108\u0020\u0032\u71b6\u01eb\u46a6\uf034\u0020\u0066\u0063\u0067\u0077\u0020\u0069\u0068\u0020\u0020\u0069\u0067\u0020\u0079\u0072\u006e\u0061\u0064\u0065\u0020\u0078\u006b\u0074\u0070\u0020\ud7e4\ud7d9\u0020\u0020\ue0104\ue017d\ue0124\u0020\u007a\u0020\u0073\u0067\u0064\u0020\u006e\u0063\u006f\u0063\u0020\u006a\u006f\u0062\u0076\u0079\u0063\u0020\u0068\u0066\u006d\u0069\u006c\u0075\u0062\u0061\u0020\u007a\u0066\u006f\u0067\u0020\u0020\u0020\u97510\u02a1\u0049\u0020\u007a\u006d\u0073\u0020\u003c\u0070\u003e\u003c\u0021\u002d\u002d\u0020\u0072\u006f\u006e\u0068\u0069\u0073\u0020\u0743\u0020\u101c0\u0020\u1d1f1\u0020\u0065\u006b\u006b\u0067\u0068\u0063\u006b\u0020\u0028\u002d\u002a\u002d\u005d\u002a\u007c\u002e\u0020\u0020\u0077\u0072\u0072\u0020\u0039\u7be5\u50c7\ue2f3\u0020\u2445\u2449\u2446\u2448\u245c\u2458\u245f\u244f\u2452\u2459\u2459\u0020\u001b\u0020\u101d8\u101dc\u101da\u0020\u0077\u0020\u0074\u0079\u0020\u9e56\u0358\uf00e\ucd8a\u0020\u0020\u003f\u0029\u0020\u003e\u003c\u0021\u002d\u002d\u0020\u0073\u0068\u0076\u0077\u0078\u0020\u0072\u0020\u0070\u0066\u0079\u0020\u004c\uf05e\u9222\u0020\u0020\u0062\u0075\u0077\u0064\u0020\u0064\u0077\u0020\u1802\u183a\u0020\u0020\u0075\u007a\u006b\u0069\u0073\u0078\u0072\u0020\uec7c\ufb5e\u0272\u0076\u4698\u3720\u0020\u2985\u29d5\u29ad\u29b8\u0020\u0020\u0020\u0071\u0065\u006e\u0071\u0020\u0068\u0071\u0073\u006d\u0067\u0020\u0078\u006f\u0062\u0066\u0075\u0068\u0020\u0062\u0072\u0070\u0067\u0073\u0068\u0020\u4fea7\uff8e\u004e\u0020\u005c\u005c\u005c\u0022\u0020\u007a\u0065\u006b\u0069\u0065\u0071\u006d\u0020\u0067\u0065\u0078\u0062\u0071\u0020\u0071\u0074\u006a\u0070\u006c\u0078\u0020\u003c\u0021\u002d\u002d\u0023\u003c\u007a\u0075\u0020\u0063\u006a\u0062\u0071\u0020\u006a\u006c\u0062\u0020\u4d99\ub406\u073f\ufc12\u1585c\u0020\u0062\ub8020\u0060\u06d0\u0020\u006b\u0020\u0072\u0020\u0073\u006d\u006b\u0063\u006a\u0020\ufe61\ufe67\ufe59\ufe60\ufe58\ufe5c\u0020\u1012f\u0020\u0076\u0020\u006f\u006e\u0076\u0071\u0078\u0020\u006a\u0069\u0074\u0073\u0069\u0061\u0020\ue848\u0030\u004d\u0020\uf0af\uf893\u0020\u259a\u258f\u0020\u0069\u0020\u0026\u0023\u0020\ua888\ua8bc\ua8b1\ua895\ua8dd\ua897\u0020\ua916\ua924\ua92c\ua911\ua908\ua904\ua909\u0020\u006d\u0066\u0078\u006c\u0071\u0079\u0078\u0062\u006a\u0065\u0020\u0026\u0074\u0068\u0069\u006e\u0073\u0070\u0027\u0020\u0067\u006d\u0077\u006c\u0064\u0020\u0073\u006f\u0076\u0064\u0020\u006e\u0074\u0066\u0071\u0071\u0072\u0066\u0020\u0078\u0075\u0066\u0075\u0079\u0020\u0064\u0020\u0029\u002d\u003f\u0064\u003f\u0020\u003f\u002b\u003f\u002b\u0020\ua261\ua45c\ua2d9\ua45b\ua3f8\ua3e4\u0020\u31c3\u31dd\u31c1\u31d7\u31eb\u31ee\u31c1\u0020\u006d\u006a\u0020\ufe96\ufefd\ufe76\ufeef\u0020\u0e7b\u0020\u0020\u0020\u005f\u0020\u07c2\u07d1\u07f3\u07e4\u07e6\u07e7\u0020\ufe1e\ufe13\u0020\u0026\u0023\u0031\u0037\u0039\u0038\u0020\u0068\u0070\u006a\u0069\u0068\u0063\u0075\u0071\u0020\u0020\u0020\u103b7\u103ce\u103b6\u0020\u075d\u0020\uff68\uffbb\uff61\uffab\uff5f\uffa6\uff94\u0020\u0020\u0079\u006c\u0063\u0020\u578e\u0028\u0020\u12471\u12408\u0020\u0067\u0068\u0063\u0073\u0020\u0067\u0069\u0077\u0073\u0075\u0020\u07bd\u57a4\u6138\u84b74\u3500\u0020\u0e45\u0e3b\u0e6e\u0020\u0020\uea05\ue288\u002e\u0738\u0020\u006e\u0077\u0061\u0062\u0077\u0071\u006a\u0078\u0020\ufab3\uf92d\u0020\u1dcd\u1de3\u1df4\u1dfe\u1df6\u1dcc\u1df2\u1dfa\u1de4\u1dcc\u0020\u0067\u007a\u0064\u0020\u10bc\u0020\u2f68\u2f39\u2f60\u2f21\u2f5c\u2fb2\u2f9b\u0020\u003c\u0073\u0020\uaa25\uaa0f\uaa03\uaa42\uaa1c\uaa5e\uaa39\uaa2b\u0020\u005c\u005c\u005c\u0022\u003c\u002f\u0020\u0021\uf50e\u0020\u0067\u0067\u006f\u0020\u2a06\u0020\u003e\u003e\u0020\u006b\u0061\u0067\u0020\u0020\u0079\u0071\u0070\u0079\u0065\u0020\u335a\u3378\u33c5\u337d\u0020\u2454\u0020\u0065\u0066\u0074\u006f\u0074\u006c\u0079\u0020\u0020\u0028\u002d\u002b\u0029\u005d\u005b\u0020\u006b\u0079\u0070\u0075\u0020\u007a\u0071\u0069\u0079\u006b\u0020\u0003\u005c\u0022\u007f\u1098c2\u0520\u0019\u0020\u002d\u0020\u0063\u0066\u006b\u006e\u0067\u0066\u0020\u0065\u0062\u006e\u0020\u1d37e\u1d36c\u1d37e\u0020\uea44\u070a\u0020\u0071\u0062\u0078\u0071\u0065\u0063\u006b\u0020\u00da\u99cd\ue8d2\u004f\u0020\u226b\u22b3\u22fd\u2231\u22cd\u0020\u10a5f\u0020\u003c\u003f\u003c\u002f\u0020\u0020\u0067\u0077\u006a\u0062\u0079\u0064\u0067\u0064\u006e\u0020\ue833\u06ca\ufe9c\u0716\uf2e7\u0020\u0020\u0076\u0071\u0020\u0065\u0061\u0066\u0020\u0078\u0066\u0071\u006c\u006b\u0020\u0775\ub65c\u01d8\u0020\u0024\ue244\u013f\u104b8b\u0020\u0063\u0072\u0020\u0752\u96b0\u88fb\u0440\uf424\u06a5\u0020\u0020\u175f\u1755\u0020\ue52b\uc9e5\u0053\uf77a\u0000\u0020\u0072\u006e\u006d\u0068\u0069\u0020\u29fe\u29bf\u29f1\u29a8\u29cb\u29b1\u29eb\u298f\u29bd\u298f\u2984\u0020\u0072\u0062\u0061\u0073\u0078\u0020\ufee6\u0020\u006b\u006b\u0069\u0072\u0020\u0076\u0067\u007a\u0062\u0075\u0020\u0064\u0066\u0065\u0061\u0067\u0020\u007a\u0076\u006f\u006a\u0020\u006c\u0074\u0072\u0020\u0020\u0063\u006c\u0068\u0078\u0071\u0020\u0064\u006a\u0077\u0064\u006b\u0064\u0061\u0073\u0070\u006b\u006d\u0020\u102be\u102ab\u102d1\u0020\u0020\u0073\u0062\u0076\u0078\u0020\u006c\u0020\u0073\u0066\u0020\u0065\u0078\u0020\u0066\u0076\u0020\u0078\u0077\u0069\u006c\u0020\u006c\u0074\u0079\u0020\u0065\u0020\u0078\u0062\u006e\u0079\u0020\u006a\u006c\u006f\u0073\u006f\u0076\u0020\u0061\u0064\u0074\u0020\ued29\u0020\u0059\ub8fa8\r\u0010\u0020\u006c\u0020\u0063\u0073\u0075\u0078\u0072\u0062\u0020\u0fa3\u012a\uf9aa\u0334\u0003\u0020\ufb41\r\u4378\u0029\u0020\u0025\u0020\u0071\u0070\u0020\u0020\u1e70\u1e9d\u1e43\u0020\u1d24c\u0020\u0020\u006e\u0076\u0068\u0078\u0075\u0076\u0020\u007a\u0072\u0020\u2f8e\u2f5e\u0020\uf088\uf1e2\uf3a9\ue907\u0020\u0073\u0065\u0063\u006e\u0061\u006c\u0072\u0020\uaa0d\uaa10\uaa4c\uaa54\uaa08\uaa01\uaa25\u0020\u1d364\u1d37b\u0020\u0020\u006f\u0068\u0062\u0020\u0034\u0037\uec8e\u0552\u053b\u0020\u006d\u0076\u007a\u0068\u0079\u007a\u0074\u0020\u007a\u0075\u0077\u0074\u006c\u0020\u0072\ubf1a\u971c\u6c1e\u3fe5\u0020\u20ea\u20fd\u20f0\u0020\u0077\u0067\u0076\u0073\u0063\u006f\u0020\u0020\u0069\u006a\u006f\u006e\u0073\u0064\u0020\u0662\u0061\u0020\u190e\u1949\u194e\u0020\u005c\u0022\u002d\u002d\u003e\u003c\u0020\u0020\u0020\u007a\u0020\u0066\u0020\u0020\u0020\u1014f\u1018c\u10153\u0020\uf8ad\u4191\u003b\u0020\u006a\u006d\u006d\u0020\u10a61\u10a72\u10a7c\u10a64\u10a70\u0020\u07e5\u07e9\u07fd\u07d1\u0020\u844c\uf1d1\u007b\u0020\u0026\u0023\u0078\u0039\u0020\u24bf\u2470\u2489\u2493\u24c1\u0020\u0020\u0072\u0071\u0075\u0066\u0079\u006b\u0020\u0020\u0728\u0733\u0730\u074d\u072c\u0020\u0065\u006b\u0076\u0020\u0065\u0067\u0064\u0020\u0068\u0079\u0020\u0068\u0068\u0073\u0065\u0020\u0031\u0075\ue51f\u0040\u27d7\u0020\u0075\u0073\u0065\u0071\u0073\u0077\u0020\u0076\u006d\u0068\u007a\u006b\u0077\u0074\u0020\u003f\u007c\u002d\u005d\u0020\u0341\u0042\u06cc\u0020\u101c4\u0020\u0072\u0067\u0071\u0061\u006c\u0020\u27a8\u27a8\u2738\u2727\u2732\u0020\u10b0c\u10b3b\u10b2f\u0020\u0068\u0078\u006d\u0067\u006b\u0020\u003a\u6e67\u04ca\ua3c9c\uf958\u0041\u0020\u41ea\u2495\uf140\u4d27\u3122\ua6f6\u0020\u003c\u0020\u317f\u0020\u0077\u0078\u0064\u0076\u0075\u0064\u0068\u006e\u0020\uff3f\uffa2\uff86\u0020\u006a\u0078\u006f\u0075\u007a\u0020\u0020\u0020\u0068\u0068\u0066\u0020\u0066\u0028\u0020\ua372\ua37b\ua454\u0020\u006a\u0061\u0074\u0062\u0020\u1210d\u123d9\u0020\ufe39\ufe47\ufe43\ufe4c\u0020\u0072\u006a\u0020\u0020\u0077\u0073\u0067\u0072\u006f\u007a\u0020\ud91b5\u0020\u1b09\u1b30\u1b26\u1b4f\u1b58\u0020\u0074\u0079\u0068\u0068\u0073\u0063\u0065\u0020\u01bb\u00ff\u4cb65\ufb37\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0026\u0020\u006f\u0020\u000c\u0020\ua803\ua82a\u0020\ufff8\u0020\u0d49\u0d2b\u0020\u038b\ue532\ub057d\u07e5\u0074\u0020\u006b\u0072\u007a\u006b\u0066\u0077\u0020\u21e8\u2190\u21eb\u0020\u1732\u1728\u1739\u1721\u173a\u0020\u007d\u0020\u0020\ufa27\u1bdf\u0508\u06a5\ubfb4\u0020\u0077\u0062\u0063\u006f\u0020\u0020\u0020\u0066\u0020\u0075\u006f\u0064\u0078\u0072\u0020\u261f\u2680\u2632\u2603\u2686\u2658\u263a\u26ce\u0020\u0069\u0065\u006b\u0071\u006e\u006f\u0020\u0071\u0061\u006d\u0020\u0069\u0065\u0066\u006a\u006e\u0063\u0020\u0061\u0076\u0077\u006b\u0020\u0020\u0068\u0061\u006f\u0020\u0068\u0077\u006a\u0061\u0067\u0020\u007a\u0072\u0076\u0078\u006f\u0020\u0073\u0077\u0061\u0020\u0077\u0066\u0079\u0079\u0076\u0061\u0069\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u007a\u0070\u006e\u0020\u0065\u0072\u0076\u006c\u006e\u006a\u0020\u038d\u03cf\u0381\u03c8\u03e0\u03c3\u03e7\u03b4\u0020\uffb5\u0020\u0020\uf7fe\u2a0a5\u0020\u7cd9\u0020\u003b\u003e\u003c\u0020\u0062\u0071\u006f\u0020\ue0182\ue01c0\ue0183\ue018b\u0020\u003c\u0073\u0020\u04b3\u047d\u0020\u0061\u0073\u0063\u0077\u0020\u0020\u0073\u0078\u0077\u0065\u0020\u18cb\u18e8\u0020\u0079\u0020\u1881\u182d\u1856\u0020\u1039e\u10387\u10396\u0020\u0071\u006d\u006e\u0066\u006e\u0020\u2dee\u0020\u01c0\u0020\u006f\u0020\u0079\u006d\u0067\u0074\u0068\u0079\u0061\u007a\u0071\u0020\u006d\u0063\u0076\u0064\u006c\u0065\u0020\u0117\u0000\u005f\u0489\u0079\ufd674\u000f\u0020\u1995\u19a5\u19b4\u19c6\u19a4\u19a3\u0020\u005b\u002e\u002b\u0020\u0078\u0075\u0075\u0067\u0077\u006b\u0020\u0079\u0061\u0063\u0077\u006e\u006f\u0020\u0069\u0068\u0020\u006c\u006e\u0065\u0077\u006e\u0062\u0020\u006e\u0061\u0079\u0074\u006c\u0020\u0072\u0020\u0073\u0061\u0068\u0068\u0079\u006e\u0020\u0069\u006e\u0020\uec89\u07f1\u0020\u0020\u0076\u0071\u0063\u0069\u0020\u195d\u1959\u1955\u1960\u195a\u197d\u1975\u0020\u0079\u0020\u0079\u0066\u0064\u0066\u0064\u0020\u30c4\u30b3\u30b6\u30ed\u30d3\u30af\u0020\u006f\u0066\u0020\u0031\u0020\u1263\u1290\u0020\n\u003c\u0021\u002d\u0020\u4b6b\u84501\ue0e8\u0619\u0020\u0068\u0020\u005f\u006d\u0061\u003c\u007c\u0039\u0018\u005a\u0039\u0074\r\u0069\u0055\u0020\u0075\u006f\u0065\u0074\u0077\u0020\u003c\u0021\u002d\u002d\u0023\u0020\u0065\u0073\u0073\u006f\u0020\u0076\u006c\u0068\u0067\u0064\u0020\u0061\u0075\u0020\u0064\u0020\u7d47\u0020\u0067\u0075\u0020\u0064\u0078\u0074\u006e\u0066\u0020\u244f\u2458\u2454\u2450\u2455\u244c\u244b\u0020\u0020\u025f\u028b\u0297\u026e\u0277\u0020\u006f\u0076\u0074\u0020\u007a\ufe17\u4764\u3539\u02f3\u0020\u079f\u004a\u0020\u0069\u0079\u0078\u0077\u0020\u0064\u0076\u0072\u0079\u0063\u0020\ue01e5\u0020\u0020\u003c\u002f\u0062\u0072\u0020\u2ecc\u2e94\u2ebe\u2ebd\u2ea6\u2ea2\u2ee9\u0020\u6723\u043d\ue5b5\u0053\u0020\ufd33\u0109\ua6a4\u0023\ue786\n\u0020\u02d4\u4d2c2\u43f35\u0007\u0020\u0078\u0061\u0072\u0020\u0062\u0020\ufe18\ufe1d\ufe18\ufe19\ufe14\ufe1e\ufe1d\u0020\uaa0e\uaa34\uaa3d\u0020\u0061\u0073\u0063\u006a\u0020\u006d\u0065\u0069\u0020\u0060\ue6c6\u03a8\u3af5c\u0020\u005d\u002d\u007c\u002e\u003f\u002e\u0020\ue8fb\u0353\u0029\u0307\u44a1\u0051\ud033\u0717\u0020\u0037\uf572\uf078\u0020\ufb6b\ufbef\ufd2a\ufbd7\ufbb9\ufd3c\ufb55\u0020\u0020\ufd0b\u03a4\u0067\ue99c\u0006\ufc39d\u03a6\u0000\ufbbd\u0020\u265e\u2605\u26f4\u0020\u0020\u0066\u006f\u0069\u0078\u0079\u0072\u0020\ue934\uc338\uaec8c\u003b\u0020\u0068\u0069\u006d\u0020\u0065\u0062\u0074\u006d\u0070\u006b\u006e\u0064\u0070\u0020\u0061\u0075\u0020\u002e\u0029\u0020\u0070\u0077\u0075\u006d\u0079\u0020\u005c\u0022\u0027\u003c\u0070\u003e\u0020\u0067\u0066\u0073\u0064\u0064\u0071\u0020\ufe0f\ufe04\ufe03\u0020\u006e\u0061\u0065\u0075\u0076\u0067\u0068\u0020\u006b\u019e\u8e861\n\ued92\u4b01\u646e\u0020\u0020\u1092b\u0020\u0079\u0072\u0075\u0065\u0073\u0070\u0020\u0066\u0073\u006e\u0072\u0065\u0065\u0067\u0020\u8afd\uf46f\u0087\ucb8a\u4b88\u0020\u0020\u0071\u0063\u0061\u006b\u0076\u0064\u006c\u006f\u0020\u0e44\u0e7e\u0e4a\u0e54\u0020\u0070\u0077\u0066\u0076\u0020\u0064\u0020\u18bb\u18ea\u0020\u0831\u0814\u081d\u0820\u0831\u0829\u0838\u0831\u080f\u0020\u0020\u006f\u0073\u0020\u1032b\u1030b\u0020\u006a\u007a\u0074\u006a\u0078\u0064\u0020\u0070\u0075\u0078\u0078\u0078\u0070\u0020\ueeef\ue9aa\u0020\u0074\u0068\u007a\u0072\u0020\u118a\u8591d\u2de36\u0062\u0020\u0020\u6e22\u0020\u01c5\ufe07\ud0e8\u10da5e\u0020\u0ba7\u0be1\u0b8f\u0baf\u0bcc\u0b8c\u0bb5\u0bf6\u0b87\u0b90\u0020\u0079\u0020\u0037\u511f\u8ff44\u0020\u2c63\u2c6f\u2c76\u2c6f\u0020\u0020\u0020\u0026\u0027\u003c\u0073\u0063\u0020\u0066\u006d\u0064\u006b\u0020\u002d\u002a\u0020\u0079\u0064\u0070\u0065\u0072\u007a\u0020\u0079\u007a\u0076\u0065\u0020\u0029\u002b\u0028\u0020\u0062\u0064\u0020\u1682\u1698\u168d\u1683\u1691\u1687\u168d\u1693\u1682\u1680\u0020\ufb2b\ufed2\u0069\u9ec7\u0008\u0020\u0062\u0070\u0071\u006b\u006b\u006b\u0067\u0020\u007c\u0028\u005d\u002d\u005d\u0020\u0078\u0072\u0079\u006a\u006e\u0020\u01a9\u03fd\u2ca7\u0020\u0062\u0077\u006c\u0063\u0079\u0072\u0068\u0078\u0072\u0077\u006a\u0020\u0070\u0071\u0062\u006e\u006b\u006d\u0020\u000f\uedd6\u0721\u0020\u0078\u0079\u0063\u0071\u0020\u0002\u0d46\u863f\u0256\u0020\u006c\u0020\u0026\u0023\u0078\u005c\u005c\u0020\u0072\u0069\u0074\u0064\u0074\u006d\u0061\u006d\u0020\u0020\u0070\u006d\u0072\u0020\u0071\u006a\u0074\u0020\u006e\u0077\u0070\u006a\u0070\u0020\u007a\u0066\u0070\u006e\u006a\u0065\u0020\u006f\u2bf0\u0020\u0066\u0061\u0074\u006b\u0020\u0078\u0078\u0071\u0078\u006b\u0067\u0020\uaa65\uaa6c\uaa62\uaa68\uaa62\uaa77\u0020\u0079\u0077\u0020\u003f\u6ae9\u007f\u0020\u0020\u0072\u0061\u0072\u0068\u0062\u006e\u0020\u0053\u0066\u0057\u07a9\u007c\u0416\u0020\u2afc\u2add\u2a61\u2ab8\u2a78\u2a53\u2a51\u2a54\u0020\ua4ec\ua4d5\u0020\u0075\u0079\u0069\u0079\u0077\u0069\u0020\u0020\u0020\u10c2\u10b2\u10a5\u0020\u007c\u005b\u002d\u002a\u002d\u0020\u02ea\u02c0\u02cf\u02e7\u02de\u0020\u006a\u0020\u0068\u0061\u0076\u0078\u0075\u0071\u0020\u0e2b\u0e63\u0e09\u0e55\u0e1c\u0e5d\u0e16\u0e0a\u0020\u0168\uf019\u01c8\u0020\u0013\u63a48\u0654\u0048\u0077\uf4e9\n\ua7d9\u0745\u02be\u003c\u0020\u0061\u0070\u006f\u0070\u0020\u09ca\u09b7\u09d7\u09aa\u099b\u0020\u0073\u006b\u0070\u0078\u006c\u0020\u1055\u1085\u0020\u0004\u0020\u27681\u248c1\u0020\u1b6b\u1b7b\u1b68\u1b22\u1b44\u0020\u0065\u0076\u006a\u0070\u0061\u0071\u006c\u0064\u006c\u0020\u0070\u0078\u0070\u0070\u0065\u0020\u0020\u0020\u10846\u10847\u10856\u0020\u0076\u007a\u006f\u0072\u006a\u0020\u27d3\u27dd\u27cf\u27c4\u27c6\u0020\u003f\u002e\u0028\u0020\u0020\u0068\u0076\u006e\u0064\u006f\u0067\u0070\u007a\u0063\u0020\u0020\u0073\u006b\u0020\ucf06\ufc8a\uc163\u0020\u31c7\u31e3\u31ee\u31ed\u31df\u31ca\u31e6\u31ed\u0020\u0027\u003c\u003f\u003c\u002f\u0020\u006a\u0061\u0073\u0063\u0071\u0020\u0020\u10b4d\u0020\u0020\ubed4\u002d\u6e43\u003e\u0021\ue715\u0020\u0020\u006e\u0066\u0079\u0064\u0064\u0064\u0065\u0020\u006b\u0063\u0074\u0074\u0020\u006d\u0061\u006a\u0077\u0020\u006a\u0020\u16e9\u16cb\u16ac\u0020\ua94e\ua950\u0020\u0071\u0076\u0062\u0020\u0069\u0077\u0073\u0065\u0020\ue001d\ue0076\u0020\u006b\u007a\u0075\u0061\u0074\u0073\u0020\u0013\u0255\u03b4\u0049\ua2d2\u0020\u47fb\ud449\u295a\u03aa\u0054\u0011\u01a5\u0040\u0020\u007f\u0020\u0443\u04cb\u0418\u0020\u244b\u244e\u245c\u244f\u0020\u205e\u2005\u2024\u205b\u0020\u076d\u0142\u0020\u0063\u005d\u0028\u002b\u0028\u002d\u0020\u0043\u0017\u004c\u0020\u0020\u007f\uea18\u6752\u2103\u4d50\u0435\u0353\ueae2\u0411\u3f17\u0020\u0074\u0075\u0073\u0070\u0020\u007a\u0020\u2ff3\u2ff4\u2ffc\u0020\u0065\u0078\u0072\u0079\u0075\u0074\u0068\u0020\u0075\u0065\u0020\u006b\u0070\u0072\u0063\u0077\u0072\u0020\u0020\u0071\u0070\u0079\u0076\u0020\u0066\u0020\u005d\u003f\u007c\u003f\u005d\u002b\u0020\u0073\u0065\u0061\u0066\u006c\u0020\u006e\u0067\u0020\u1695\u0020\u0005\u0433\u0016\u073b\u0790\u017c\u0020\u0070\u006b\u006b\u0075\u0061\u0073\u0075\u0020\u0062\u0020\ua536\ua516\ua526\ua536\u0020\u007c\u0006\ue382\u055b\uf9dd\u028f\uc9d6\u87d1\u0020\u41bf\u005d\uecc1\u02f0\u0049\u0020\u0020\u006e\u0076\u0072\u0068\u006f\u0074\u0061\u0020\ueef3\uf68a\u0020\ua919\ua91b\ua928\ua90d\u0020\uc882\u05a2\ub85c1\u0048\ua8f3a\uf38d\u0020\u0503\u0528\u0514\u0515\u0508\u051c\u052c\u052d\u0020\u17e5\u17c0\u0020\u19e0\u19e8\u19fd\u19f4\u19fb\u0020\u0064\u0072\u0078\u0070\u006e\u0020\u2cd6\u2c85\u2cee\u2cf8\u2cd8\u2cf3\u0020\u0066\u0075\u0076\u006a\u0078\u0071\u006f\u007a\u0020\u101f4\u0020\ue676\uf435\u0024\ue23b\u0039\u106c52\u0020\u0020\u006f\u0073\u006c\u0067\u0020\uf1bf\u006a\ud2ec\u0020\ue232\u0020\u0020\u0067\u0065\u0071\u006e\u0067\u006e\u006a\u0020\u0071\u0079\u006a\u0077\u006e\u0066\u0074\u0020\u0020\u0078\u0066\u0068\u0020\u0076\u0067\u0073\u0072\u0062\u0074\u0020\u0066\u0020\u0068\u0069\u006e\u0069\u0061\u0072\u0020\u0020\u0070\u0079\u006a\u0072\u0075\u0020\u0026\u0023\u0078\u0032\u0038\u0020\u0074\u0063\u0078\u0076\u0020\u0076\u006a\u0070\u0074\u0063\u0079\u006b\u0072\u0069\u0020\u0071\u0063\u0075\u0020\u0062\u0020\u2450\u245b\u2444\u245a\u0020\u006d\u0068\u0020\u0073\u007a\u0076\u0072\u0020\u0028\u002e\u002d\u005b\u0020\u006c\u006b\u0069\u0063\u0078\u0020\u0074\u0067\u0069\u0065\u0079\u0020\ud7f4\ud7d7\ud7b7\ud7b4\ud7eb\ud7e6\ud7e3\u0020\u0026\u0023\u0078\u0020\u0061\u0075\u0078\u0079\u0072\u0020\u0020\u001b\u0079\ue99a\u006e\u0020\u0c10\u0c00\u0c66\u0c71\u0c30\u0c4c\u0c45\u0c3a\u0020\u006c\u0061\u006d\u0069\u0069\u0065\u0075\u0020\u002d\u002e\u0020\u1e69\u1e9c\u1ee8\u1e84\u1e92\u1ede\u1ef6\u1eb7\u0020\u002d\u002e\u002a\u002e\u007c\u002d\u0020\u003c\u002f\u0073\u0063\u0072\u0069\u0020\u0064\u0072\u0020\ua705\ua70c\u0020\u10493\u0020\u0034\u2f5d3\ub16d\uba18\ufdb2\u0020\u10337\u0020\u0020\u0020\u0070\u0064\u0079\u0020\u62cc\uf355\u08b7\u0439\ub3fcb\u8816\u0020\u2190\u21b4\u21d9\u21e0\u21f7\u0020\u0063\u0070\u0069\u0020\u0068\u0069\u0078\u0020\u006c\u0074\u0020\u006c\u0068\u0020\u1731\u173a\u173d\u1722\u1734\u0020\ua82d\ua822\ua818\ua81c\ua80d\ua82f\ua826\ua813\ua825\u0020\u1741\u1755\u1740\u1743\u1748\u1745\u1746\u1745\u1759\u0020\u1a16\u1a12\u1a11\u1a18\u0020\u006e\u0076\u0074\u0020\u0020\u0024\ucb45\u4c7b2\u0020\u006b\u006d\u0061\u0072\u0020\u09a7\u0020\u10006\u1000d\u1000a\u0020\u007a\u0070\u0065\u0076\u0077\u0068\u0020\u0020\u006f\u0075\u0069\u0074\u007a\u0077\u006c\u0020\u2bc8\u2b99\u0020\u0005\u023f\u0020\u0063\u007a\u0072\u0065\u0069\u0020\u0020\u0062\u0072\u0079\u0061\u0020\u0073\u0071\u0066\u0070\u0071\u0075\u0020\u2d71\u2d40\u2d51\u2d3f\u2d36\u2d6c\u0020\u0378\uf752\u0020\ue226\u0075\u002d\ue150\ufeea\u0020\u0782\uf0689\u69cd\u01d0\u0020\u0020\u0068\u006b\u0068\u0063\u0065\u0020\u0051\u0049\u004f\u0020\u0073\u0076\u0020\u0063\u0079\u006a\u006c\u0078\u0063\u0075\u0020\uf500\u01b3\u006c\u0020\u0020\u003c\u0021\u002d\u002d\u0023\u003c\u0020\ue498\ue189\uad39d\u0020\u006d\u0077\u0061\u0020\ufb36\ufb07\ufb44\u0020\u006f\u0020\u1c54\u0020\u0070\u0078\u0020\u0072\u0078\u0064\u006d\u006c\u0064\u006e\u006c\u0020\u0068\u0076\u0070\u006c\u006f\u006f\u0064\u0075\u0070\u0020\u0064\u0065\u0072\u0065\u0068\u0020\u003c\u0021\u002d\u0020\u002d\u002a\u007c\u007c\u002e\u0020\u002d\u0065\u0020\u0064\u0069\u006a\u0063\u006c\u0020\u23fd6\u200fe\u0020\u10400\u0020\u0063\u005b\u0029\u0020\u0020\u004c\u0025\u22a53\ue5bb\ufa84\u0020\u0061\u0068\u0020\u003c\u0073\u0063\u0072\u0020\u003c\u0070\u003e\u003c\u0021\u002d\u002d\u0020\u0069\u0076\u006a\u0061\u0061\u0062\u006c\u0020\u0020\u007f\ub594\u10befe\u0152\u0020\u0065\u006d\u006b\u006d\u0020\u006d\u0078\u0067\u006b\u0020\u0068\u0071\u006c\u007a\u0020\u0068\u0070\u0070\u0064\u0071\u0072\u006b\u0063\u0065\u0020\u0026\u0023\u0078\u0037\u0065\u0065\u0020\u0020\u0020\u0066\u0077\u0065\u0020\u0065\u0067\u0066\u0020\u006d\u0074\u0064\u0020\u006b\u0069\u0020\u0020\ua931\ua93e\ua937\ua947\u0020\u0226\u01fd\u0239\u0020\u1c13\u1c38\u0020\u0e24\u0e71\u0e70\u0e4a\u0e43\u0020\u0007\u007f\u004a\u0020\u0064\u006f\u006b\u0064\u006a\u0020\u0074\u0065\u0020\u01ed\uf6c7\u4316\uf599\u0020\u002d\u0070\u0020\u0066\u0020\u003f\u003e\u003e\u003e\u003f\u003e\u0026\u0023\u0020\u05ec\uee44\u03ff\u0036\u0334\u004d\u85c8f\u573a\u0020\u10a6f\u10a78\u10a60\u10a7c\u10a69\u10a6b\u0020\u0075\u0067\u0075\u0073\u006c\u0020\u0020\u0071\u0076\u0062\u0062\u006e\u0020\u0019\u0768\u0019\u8f6a3\u0020\u006c\u0070\u006f\u0077\u006a\u0020\u019d\uef35\u0043\u0024\u26e2d\u007a\u0020\u2590\u2598\u0020\u0077\u006f\u0064\u0020\ud64d\ueb7c\u0020\u0069\u0075\u0065\u006a\u0063\u0070\u0020\u0078\u007a\u0066\u0064\u0068\u0062\u0063\u0020\u0020\u0020\u0053\uf6ca\u0037\u9937\u05ce\uf63f\u0020\u006e\u0078\u0063\u0069\u0077\u0078\u006d\u0020\u619b\u0038\u3a71e\ua1a4\u7b543\u00be\u0020\u0068\u006c\u0078\u006c\u006a\u006b\u0062\u0063\u0020\ue750\u2b61\u0071\u045a\u040f\u0020\u0067\u0062\u0079\u0020\u0020\u0071\u0020\u0020\u1f2df\u0020\u0079\u0072\u0076\u0067\u006b\u006e\u0071\u0070\u0020\u006e\u0079\u0071\u0075\u0061\u0020\uaf22\ufeb7\u4ab7\u0020\u255b\u2531\u2544\u2508\u2576\u2564\u0020\u0067\u0072\u0020\u006d\u006f\u006a\u0072\u006e\u0062\u0020\u0076\u0020\u1122\u112c\u1134\u11d0\u0020\u880d\u00d6\u0056\ud64e\u0020\u0028\u005d\u0020\u006c\u006b\u0020\u2770\u2771\u27ba\u2770\u2784\u27b5\u279e\u0020\u006c\u006b\u0064\u006f\u0077\u0064\u0020\u0065\u0069\u006b\u0078\u0068\u0063\u0061\u0020\u0072\u007a\u0020\u006e\u006e\u0076\u0072\u0074\u0075\u0079\u0066\u0020\u0020\u1f1f\u0014\uf152\uf9b9\u051a\u0020\u007c\u002e\u0062\u003f\u0028\u0029\u007c\u0028\u0020\u006b\u0063\u0062\u0020\u0072\u006a\u0068\u006d\u006e\u0020\uaa6f\u0020\u0362\ufc3d\ue169\u9dbc\u0020\uf17d\u0063\ube058\ufb45\u0098\u2e0b\uee61\u0020\u006f\u0066\u0071\u006a\u0020\u0028\u002b\u0020\u0072\u0076\u0068\u0073\u0020\u0061\u006e\u0065\u0079\u007a\u0070\u006e\u0020\u0069\u007a\u0077\u0061\u0065\u0020\u0073\u0075\u0074\u0020\u0075\u0071\u0078\u006c\u0020\u0020\u0020\u0020\u0076\u0064\u0075\u0079\u0020\u006f\u006d\u0020\u61ff3\udf209\u0274\u02e8\u0063\u56c5\u0010\u0020\u005b\u0029\u0029\u002d\u0020\u0067\u0078\u0063\u006f\u0020\ucfb6\u0020\u006d\u007a\u0064\u0020\u0bab\u0be7\u0bd1\u0b93\u0020\u0066\u0077\u0067\u0070\u0071\u0077\u0077\u0064\u0066\u0020\uf5aa\u8571\u047f\r\u0020\u0067\u0020\u0070\u0065\u0020\u006c\u0071\u0072\u0062\u0074\u007a\u0020\u0020\u0067\u0063\u0070\u0079\u006d\u0074\u006c\u006e\u007a\u0020\u1038e\u1039f\u0020\u0063\u0079\u0020\ufe4b\ufe41\u0020\u0055\u001f\u0051\u0020\u006f\u0020\u003c\u002f\u0070\u003e\u003c\u0020\u0020\u0065\u0020\u0020\u1a5f\u1a62\u1aa3\u0020\u0020\u0020\u0020\u1f2e2\u1f22a\u1f254\u0020\u7469a\u0029\u07d9\u0020\uffc2\u0020\u0072\u0063\u0074\u0020\u002d\u002d\u0028\u0020\u1d9f\u1daf\u0020\u006a\u0068\u0071\u0078\u0063\u0072\u0020\u0077\u0020\u0069\u0078\u006b\u0077\u0070\u0020\u1d224\u1d22c\u1d214\u0020\u0020\u0066\u006e\u0020\u0020\u003c\u0073\u0063\u0072\u0069\u0070\u0074\u003e\u003c\u0021\u0020\u0078\u007a\u0078\u0076\u0071\u006c\u0020\u0079\u0073\u0078\u0020\u0074\u0069\u0062\u0069\u0020\u103d5\u103dc\u0020\u005d\u0028\u005b\u0065\u002a\u0020\u0069\u0063\u0061\u006b\u0068\u0064\u0062\u0069\u0073\u0079\u0020\u0071\u006a\u0071\u0069\u0020\u0069\u0063\u0066\u006e\u0020\u0031\u0030\u0035\u0033\u0033\u0020\u006b\u0072\u0069\u007a\u0020\u7b8c\u0020\u2dee\u2df4\u2df3\u2df8\u2ded\u2def\u2dfa\u2def\u0020\u003c\u0021\u0020\u0062\u0076\u0069\u006e\u0020\u006d\u006d\u0079\u0020\ua677\ua65e\u0020\u003c\u0021\u002d\u0020\ua9ee\u3c581\u0020\ufd1e\ufb8d\ufcbf\ufded\ufd9d\ufdd6\ufbfa\u0020\u0020\u06e9\u0020\u1d231\u1d243\u0020\u0076\u0076\u007a\u0020\u102dc\u102d2\u0020\u006b\u006c\u0020\u006b\u0076\u0062\u0020\u0062\u002d\u005d\u002e\u0064\u002b\u0020\u0026\u0020\ufe00\u0020\ue0025\ue007c\u0020\u10328\u1030a\u1032f\u10314\u0020\u0066\u006a\u0020\u0067\u0078\u0076\u0068\u006e\u0020\u298a\u29dd\u2987\u29cb\u298f\u0020\u006c\u0075\u0065\u0061\u0062\u0071\u0069\u0068\u0077\u006b\u0020\u0020\u1344\u12c4\u1371\u12c6\u126b\u12ff\u121b\u0020\ub1c8\ub284\u0020\u0070\u0079\u007a\u007a\u006a\u0020\u0061\u0020\u0020\ufba4\ufbc0\ufc75\ufd1f\u0020\u0029\u3371\u0020\u0643\ue462\u000e\u0020\ue694\u0053\u0523\u0020\u006f\u0072\u0072\u0061\u0020\u0065\u0077\u0078\u006a\u006e\u0067\u0020\u0026\u0020\u02f3\u02bb\u02e8\u02de\u0020\u0026\u0023\u003e\u0020\u006f\u0070\u0072\u0078\u0075\u0079\u0075\u0020\u103c9\u103c3\u0020\u0078\u0079\u0078\u0064\u0078\u0020\u006a\u0062\u0075\u0078\u0076\u006f\u0020\u006a\u0061\u0074\u006b\u0020\uf6fa\u0012\u0020\u0020\u1a3c\u1a96\u1a65\u1a83\u1a23\u1a8c\u1a5b\u1a3c\u0020\u006f\u0020\u10321\u10304\u0020\u17b4\u0020\u0026\u021a\u5c97\u073e\uf040\u005d\u0067\u0020\u0076\u007a\u0075\u0020\u0016\uea52\u001f\u1000ca\u0020\u0c13\u0c01\u0c74\u0020\u0074\u0020\u4df2\u4df3\u4dd3\u0020\u02c9\u737b8\u1261\uf11f\ueff0\u0020\u005f\u07ce\ue5ac\u02c2\ue6bf\u79d2\ub9ba\u9a6c\uc398\u0020\u1018b\u1016f\u10154\u10148\u0020\u0020\u003c\u002f\u0073\u0063\u0072\u0069\u0070\u0074\u0020\u0077\u0079\u006a\u0063\u006c\u006e\u0020\u1398\u139c\u1399\u1398\u138b\u1388\u0020\u078e\u003e\ue349\u69349\u0147\u7f7b\u0020\u0020\u0020\u0077\u0065\u0062\u006a\u0066\u0020\u0718\ubfd7\u2e21\u0143\uaa65\u0020\u31da\u31d8\u31df\u31d0\u31de\u31dc\u31ce\u0020\u005b\u003f\u0020\u0063\u0062\u006b\u0079\u0071\u0020\u0071\u4de9\u012c\u41a5b\u74de4\u0020\u0020\u0020\u0020\u005c\u005c\u0027\u003e\u003c\u003f\u003c\u0020\u0020\u0078\u006b\u007a\u006d\u0077\u0020\u0063\u0020\u0020\u7745\u1941\ue082\u44dcb\u0020\u0f55\u0f14\u0f4a\u0f67\u0ff0\u0020\u0070\u0061\u006f\u0020\u0069\u0072\u006f\u0067\u0020\u003c\u0070\u003e\u003c\u0021\u0020\u006e\u0065\u0065\u007a\u006f\u006e\u0066"; - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); Random random = random(); @@ -202,7 +202,7 @@ public class TestJapaneseAnalyzer extends BaseTokenStreamTestCase { public void test5thCuriousString() throws Exception { final String s = "ihcp gyqnaznr \u2d21\u2d07\u2d0a\u2d02\u2d23\u2d27\u2d13\u2d02 \u1d202\u1d223\u1d248\u1d222 \ufb0d\ufb28\ufb2c\ufb0f\ufb05 \u2c25\u2c43\u2c10\u2c03\u2c2f\u2c0e\u2c15 nwto \ua785\ua7d8\ua7f2\ua77f\ua7cf\ua781\ua77f\ua757\ua72c\ua7be\ua7eb\ua73a �\u0693b kswwheh flz ktqgfe \u4de9I\u0001\u98411\u5504\u55641\u032b\ue3a9 C^l\ue564\u027f\u10b34f\uc46f aecihbou bp qrud eksbxkwgo pokyimh xomhw uiurixk pmpsmly \u3457\uf39c\ufafd\u22ae8 xr \u101ef\u101de \ue000b\ue0006 avijdmer \u1571\u160e\u15fc\u147f\u1488 zyhgksku \u0318\u0340 ) rd zlawdwej ickyyil \u1cf0\u1cf7\u1cef b]fe+?f?*? nqjccb btujcvxwdd tcakgxs fddow \u013b\uec4a\uf8cd\u78142\u2b70\uf3ae\u0214\u217a\ue657 \uedec\uecda\u0614\u1ae9\uf705\u0544\ufc09f \u1169 \ua599\ua517\ua5e5\ua576\ua5b5\ua528\ua60d\ua57c\ua638\ua552\ua618 \u27565\ue5ce\ue4f6#\u2389 bwxtsg \u0ce6B\u9ed1.\u05d8\ue235\u59e0A

647910 bybgvsvuv \u0684\u8c7e\ua668E\ue7adR\u5250?\u17a36 ) \u04d0\u0014} \ufaf0\ufac2\uf9d6\ufa96\uf97d\uf95f\ufa45\ufae6 \u9dc9\u92fa\u78e8\u97bd\u9bab\u51e1\u8ecd\u7f12 \u14f2\u14f6\u1628\u14ca\u1555\u14e3 vjfqjql kztnhqdfpzu fbzhkzbr \u4398\u492c6\u038d\u3476 \u101a2\u101ae\u101bd\u101cf jucklftmanmngw ?>< glherbb dwo \ued44Y \u1038\u1016\u1075\u107c\u1061\u1027\u1045\u1054\u1086 voscnap \u01c6\u001c\u06aa\ue8a2l \uf06a\ubfe6\uef76\uf197\u86eec\u7b81X gfjowugtxq qslcqzn \u1c60\u1c75\u1c64\u1c6c\u1c65\u1c66\u1c6c r e+?-|b| \u19cd\u1991\u19a7\u19a0\u19d3\u19d1\u19d0\u1999 \u177f okso \u8f87| \ue56cm\u025c\ubc039\ue415\u0002 uljephzf vaspgv gdxtritw ifgdwcikkyiob -[[ jgswx vegjwrermtv lxvcxe lg \u26ab\u26d6\u263c\u2657\u2651\u26d6 \u10b6e\u10b65 %\ue107 \uf803\u0417\ufaa5P\uf08a \ueb35\u024f\u0690\ud3740\u05ad \ue0c0\uf6c7\u046a\uebd3\ue257\uf704 k cf hqzjydhegztm uwbbasg nbykogqlnbingdw lf

uvqswllbbozu \u0bc1\u0bfa\u0b9a\u0bcf\u0b80 -]+ \u3164\u3165\u3181\u318f\u3154 hjpdfmxu (d)( \\'42 tpjbuxlz .[( puunlpd qwtpdequedgy \u1004d\u1007f\u10024\u10041\u10040 a\uf607 erxgt wqiyuuh zj \u31f9\u31f9\u31f1\u31f6\u31ff\u31f6 \u07ec jhtfnvhbpm \u846f9N\u0369 ser ystcwekly \u1770\u176b\u1765\u1764\u176a pkr \u171c\u1700\u171d\u1703 \u02fd\u02f1\u02e8\u02e0 \u9938\u9790\u652c\u85a0 hopzdmo \u2084\u2075\u209d\u2070\u2073\u207a\u2073\u2088\u2080\u2086\u207b\u2097 kjeuj \u1d064\u1d0ef\u1d0e6\u1d02b\u1d0d8 \u128d\u12c2\u12bc\u1309\u123e\u1305\u12c9\u126e\u1243\u1266\u1247 \u1006d\u10001\u1001e jvmo \u02eaw\u5db6b\u010b\u0682\u0fa7;\uae0c\uec6f\u5aaa6 \u01ec\ufeccfKt\u7af6 dhhddrl piofeczg \u2d2c\u2d05\u2d1f\u2d0e\u2d1b\u2d16 s\ufa04Gh\u001b\u0759\u05a6 ehhbgswb \ua9f0\ue3c2\u0208j \u212e\u2116\u2122\u2130\u2135\u2108\u2106\u214e \u1046e\u10456\u1046d fahjn lcfhxxxlj \u1011e\u10138\u1010c yurxoxykzhaq iwv \ue0e0\ue5a0\ue2c0\uead0\u1027ab\uf0a7k\ue6df0\u02e4 \u10907\u10907 a mxanvzwv iehu \u0770\u0766\u0768\u075a\u076f\u075c >

>\n?> |.?(-+] rcd \u080f\u082c\u0800\u0833\u080b\u0834 kudsastaga zxennlj \u9e097\ue994\ue0d9\u06d4B dnrqvztrw \u195b\u1970\u1962\u197c\u196e\u1960\u1959 nzlwzndyaxg rvdiepvg kdpkmwhkw .||[() mbnzcm \u0748\u0016\u70b65\u0410\u22d9\u9e3e jrjelhyvgsibt ;\ubaf6\ua99d\u9086b wf & \u0943\u0965\u0964\u0958\u092f\u096a\u0931\u0948 \u0013\u42e2\ua5b5D\u5f98e\u5991\u0244 )||]- \u7864e\u0250\uca2b\u05d5 )[..?)) \u2df3\u2dfb\u2df8\u2dec\u2df1\u2de7\u2de9 htiato \u0014,\u0321\ue918\u05a5\u7a23e6\u532b2\u0486\uf52d ftiiziaz \ueaca\ub4af4\ufe06P wechywnla silxy \ufe08\ufe00 \ua6cc\ua6ae\ua6de\ua6ec\ua6ce\ua6ee\ua6a0\ua6b2\ua6cc\ua6e5\ua6f4\ua6e2\ua6eb\ua6a9 \ua88f\ua88c\ua896\ua89d\ua89e\ua887 \u30e7\u30ea\u30ee\u30ec\u30ec\u30ff\u30ce \u1cb78\u10e2b3\u001e\ua212 m ro \u3951\u3db1\u4bdd\u3cb8\u4672\u3fd4 \u27f0\u27fc\u27fa\u27f5\u27fa\u27fd\u27f9\u27f2\u27fe lsssf \u0562\u02eb ttudnzewbysvlr \u22e2\u22fa\u2285\u22ad\u2252 5\ub6b4\uf72ef\u0180\ueac8 \u075e\ud9b0cK^\u3fded\u66d4\u066b\u001a\u0091 \u13d5\u13d4 ..[ \u8cfa\u2554e\ufe4dM\u0017 chlax rdfphn \ub76c9 \u1093c\u1092f \u5821\ufa16w\u0542\uecce\u9b1d4 \u10b7d\u10b7f\u10b76 ibkbyhshddvsc letbtcg &p cbzpnbk ]e-|[c+]] \u03c0\u03d2\u0384\u03f8\u03e2\u03c3\u0391\u03ff\u03c5 ? edwgtwymf \uf6ed\uec52\uf91f\u03b4\u8f33\u79a5 \u4dec\u4dd8\u4dd4\u4dfd\u4de1\u4de3\u4df2\u4de9\u4de6\u4dfd c rzayu vltmc CJ\u1cdd7 *+.-|(c)a \u77e09:U\ue4b8\u7664 vlbis edr \ubde91\u0333k\u0230\u2e05\u81cd *+[.*]+e \u0800\u082b\u0830\u0804\u0807\u0813\u082a\u083d\u083b\u0831\u0804 pwwsfla \ua83e\ua837\ua830\ua83e\ua831\ua831\ua830\ua835\ua832 \u176e\u177f\u176b\u1770 \u2590\u2582\u259a\u258e\u2598\u259e\u259e\u2585\u258d\u2587\u2593\u2582 fdrv \ue331\uf5fb\u0010\ufe4bNO \u10085\u100f6\u100ec\u100f0\u100ce wyshjqolv qketbwoxt \uec69\u00f4\ud1ee9\ueaa9P\uf997\ub4487\ud76eb \u1316c\u13088\u13028 ejsuht \ue039\ueb04\ueec2\u3f2fb\u073b\u00ae'\ufb11\u0558[\u15b5\ue2bf mppiyxcg \\\" w\uecc49P\ub0cfe\u0004 \u058f\\\ue794Y\u145b\uf4744\u5f54 neytjvrzf blyzvdh plzldu u \u2ca6\u2ca3 '\"''\\ snuotzjttm \u29ff\u298a\u29f1\u29a5\u299a\u29ae\u29ec\u29bb\u2983 \u3fdb3\uff07\ua601b\u0406\u0091 mxqmzib +*. najy r\u74c4\ued24\uf631\u04c0~HG\u0017I vhbjdhhcrn mtqwskrpj xhh fa kalvhruartx **]a* eyggsjs � pns "; - final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH, + final Analyzer a = new JapaneseAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); Random random = random(); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java index e2397621f63..4c4345b5c56 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ja; */ import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -43,7 +44,7 @@ public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(asSet("あり"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("あり"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java index a439b8586db..532873c1a31 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import java.io.IOException; +import java.io.Reader; /** * Tests for {@link JapaneseKatakanaStemFilter} @@ -64,7 +65,7 @@ public class TestJapaneseKatakanaStemFilter extends BaseTokenStreamTestCase { } public void testKeyword() throws IOException { - final CharArraySet exclusionSet = new CharArraySet(asSet("コーヒー"), false); + final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("コーヒー"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java index a6b04fc42e6..7f671ee97c6 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link org.apache.lucene.analysis.Analyzer} using Morfologik library. @@ -31,10 +32,12 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; */ public class MorfologikAnalyzer extends Analyzer { private final String dictionary; + private final Version version; /** * Builds an analyzer with an explicit dictionary resource. * + * @param version Lucene compatibility version * @param dictionaryResource A constant specifying which dictionary to choose. The * dictionary resource must be named morfologik/dictionaries/{dictionaryResource}.dict * and have an associated .info metadata file. See the Morfologik project @@ -42,15 +45,16 @@ public class MorfologikAnalyzer extends Analyzer { * * @see "http://morfologik.blogspot.com/" */ - public MorfologikAnalyzer(final String dictionaryResource) { - this.dictionary = dictionaryResource; + public MorfologikAnalyzer(final Version version, final String dictionaryResource) { + this.version = version; + this.dictionary = dictionaryResource; } /** * Builds an analyzer with the default Morfologik's Polish dictionary. */ - public MorfologikAnalyzer() { - this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE); + public MorfologikAnalyzer(final Version version) { + this(version, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE); } /** @@ -65,10 +69,10 @@ public class MorfologikAnalyzer extends Analyzer { */ @Override protected TokenStreamComponents createComponents(final String field) { - final Tokenizer src = new StandardTokenizer(); + final Tokenizer src = new StandardTokenizer(this.version); return new TokenStreamComponents( src, - new MorfologikFilter(new StandardFilter(src), dictionary)); + new MorfologikFilter(new StandardFilter(this.version, src), dictionary, this.version)); } } diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java index 56c16141663..08b4ce4dd3c 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java @@ -49,7 +49,7 @@ public class MorfologikFilter extends TokenFilter { private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); private final CharsRef scratch = new CharsRef(0); - private final CharacterUtils charUtils = CharacterUtils.getInstance(); + private final CharacterUtils charUtils; private State current; private final TokenStream input; @@ -63,8 +63,8 @@ public class MorfologikFilter extends TokenFilter { /** * Creates a filter with the default (Polish) dictionary. */ - public MorfologikFilter(final TokenStream in) { - this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE); + public MorfologikFilter(final TokenStream in, final Version version) { + this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE, version); } /** @@ -72,8 +72,9 @@ public class MorfologikFilter extends TokenFilter { * * @param in input token stream. * @param dict Dictionary resource from classpath. + * @param version Lucene version compatibility for lowercasing. */ - public MorfologikFilter(final TokenStream in, final String dict) { + public MorfologikFilter(final TokenStream in, final String dict, final Version version) { super(in); this.input = in; @@ -83,6 +84,7 @@ public class MorfologikFilter extends TokenFilter { try { me.setContextClassLoader(morfologik.stemming.Dictionary.class.getClassLoader()); this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict)); + this.charUtils = CharacterUtils.getInstance(version); this.lemmaList = Collections.emptyList(); } finally { me.setContextClassLoader(cl); diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java index f8d731b1a21..41f09473f32 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java @@ -75,6 +75,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory { @Override public TokenStream create(TokenStream ts) { - return new MorfologikFilter(ts, dictionaryResource); + return new MorfologikFilter(ts, dictionaryResource, luceneMatchVersion); } } diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java index 08c983d68e4..2808caa096e 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.Version; /** * TODO: The tests below rely on the order of returned lemmas, which is probably not good. @@ -36,7 +37,7 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { private Analyzer getTestAnalyzer() { - return new MorfologikAnalyzer(); + return new MorfologikAnalyzer(TEST_VERSION_CURRENT); } /** Test stemming of single tokens with Morfologik library. */ @@ -165,16 +166,18 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { /** */ public final void testKeywordAttrTokens() throws IOException { - Analyzer a = new MorfologikAnalyzer() { + final Version version = TEST_VERSION_CURRENT; + + Analyzer a = new MorfologikAnalyzer(version) { @Override protected TokenStreamComponents createComponents(String field) { - final CharArraySet keywords = new CharArraySet(1, false); + final CharArraySet keywords = new CharArraySet(version, 1, false); keywords.add("liście"); - final Tokenizer src = new StandardTokenizer(); - TokenStream result = new StandardFilter(src); + final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT); + TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src); result = new SetKeywordMarkerFilter(result, keywords); - result = new MorfologikFilter(result); + result = new MorfologikFilter(result, TEST_VERSION_CURRENT); return new TokenStreamComponents(src, result); } diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java index a47e64990de..2448629f927 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.phonetic; import java.io.IOException; +import java.io.Reader; import java.io.StringReader; import org.apache.commons.codec.Encoder; @@ -27,6 +28,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; /** * Tests {@link PhoneticFilter} @@ -64,7 +66,7 @@ public class TestPhoneticFilter extends BaseTokenStreamTestCase { static void assertAlgorithm(Encoder encoder, boolean inject, String input, String[] expected) throws Exception { - Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); tokenizer.setReader(new StringReader(input)); PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject); assertTokenStreamContents(filter, expected); diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java index c2d3db6d431..b83638fcb30 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java @@ -88,7 +88,8 @@ public final class SmartChineseAnalyzer extends Analyzer { // make sure it is unmodifiable as we expose it in the outer class return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, - StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT)); + StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT, + Version.LUCENE_CURRENT)); } } @@ -148,7 +149,7 @@ public final class SmartChineseAnalyzer extends Analyzer { // The porter stemming is too strict, this is not a bug, this is a feature:) result = new PorterStemFilter(result); if (!stopWords.isEmpty()) { - result = new StopFilter(result, stopWords); + result = new StopFilter(matchVersion, result, stopWords); } return new TokenStreamComponents(tokenizer, result); } diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java index 7ac5bc14ef8..9240fbb623a 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java @@ -35,6 +35,7 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; import org.egothor.stemmer.Trie; /** @@ -76,7 +77,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class, - DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#"); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) @@ -96,17 +97,18 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ - public PolishAnalyzer() { - this(DefaultsHolder.DEFAULT_STOP_SET); + public PolishAnalyzer(Version matchVersion) { + this(matchVersion, DefaultsHolder.DEFAULT_STOP_SET); } /** * Builds an analyzer with the given stop words. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ - public PolishAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + public PolishAnalyzer(Version matchVersion, CharArraySet stopwords) { + this(matchVersion, stopwords, CharArraySet.EMPTY_SET); } /** @@ -114,13 +116,15 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * + * @param matchVersion lucene compatibility version * @param stopwords a stopword set * @param stemExclusionSet a set of terms not to be stemmed */ - public PolishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { - super(stopwords); + public PolishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(matchVersion, stopwords); this.stemTable = DefaultsHolder.DEFAULT_TABLE; - this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy( + matchVersion, stemExclusionSet)); } /** @@ -137,10 +141,10 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { */ @Override protected TokenStreamComponents createComponents(String fieldName) { - final Tokenizer source = new StandardTokenizer(); - TokenStream result = new StandardFilter(source); - result = new LowerCaseFilter(result); - result = new StopFilter(result, stopwords); + final Tokenizer source = new StandardTokenizer(matchVersion); + TokenStream result = new StandardFilter(matchVersion, source); + result = new LowerCaseFilter(matchVersion, result); + result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new StempelFilter(result, new StempelStemmer(stemTable)); diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java index 9744eec3c09..60d894a5461 100644 --- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java +++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java @@ -27,12 +27,12 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the * stopwords file is missing in classpath */ public void testResourcesAvailable() { - new PolishAnalyzer(); + new PolishAnalyzer(TEST_VERSION_CURRENT); } /** test stopwords and stemming */ public void testBasics() throws IOException { - Analyzer a = new PolishAnalyzer(); + Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT); // stemming checkOneTerm(a, "studenta", "student"); checkOneTerm(a, "studenci", "student"); @@ -42,14 +42,15 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase { /** test use of exclusion set */ public void testExclude() throws IOException { - CharArraySet exclusionSet = new CharArraySet(asSet("studenta"), false);; - Analyzer a = new PolishAnalyzer(PolishAnalyzer.getDefaultStopSet(), exclusionSet); + CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);; + Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT, + PolishAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTerm(a, "studenta", "studenta"); checkOneTerm(a, "studenci", "student"); } /** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { - checkRandomData(random(), new PolishAnalyzer(), 1000*RANDOM_MULTIPLIER); + checkRandomData(random(), new PolishAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java index fe69924fa80..bcc9b601c92 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java @@ -93,7 +93,7 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements * @return array of Lucene queries */ private static Query[] createQueries(List qs, Analyzer a) { - QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a); + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a); List queries = new ArrayList<>(); for (int i = 0; i < qs.size(); i++) { try { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java index d611b5a49b0..0d92db19824 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java @@ -52,7 +52,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")); String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD); - QueryParser qp = new QueryParser(defaultField, anlzr); + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr); qp.setAllowLeadingWildcard(true); List qq = new ArrayList<>(); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java index 545b0d71e46..e76926e3eac 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java @@ -60,7 +60,7 @@ public class LongToEnglishQueryMaker implements QueryMaker { @Override public void setConfig(Config config) throws Exception { Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", StandardAnalyzer.class.getName())); - parser = new QueryParser(DocMaker.BODY_FIELD, anlzr); + parser = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, anlzr); } @Override diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java index 6db388a0cb1..259928d270c 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersQueryMaker.java @@ -73,7 +73,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker * @return array of Lucene queries */ private static Query[] createQueries(List qs, Analyzer a) { - QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a); + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a); List queries = new ArrayList<>(); for (int i = 0; i < qs.size(); i++) { try { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java index dc1767235b6..840d2dc3a6e 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleQueryMaker.java @@ -48,7 +48,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker { Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")); - QueryParser qp = new QueryParser(DocMaker.BODY_FIELD,anlzr); + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr); ArrayList qq = new ArrayList<>(); Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2")); qq.add(q1); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java index fb2f6f2ebc9..a0b33c5422c 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java @@ -63,7 +63,7 @@ public class SimpleQQParser implements QualityQueryParser { public Query parse(QualityQuery qq) throws ParseException { QueryParser qp = queryParser.get(); if (qp==null) { - qp = new QueryParser(indexField, new StandardAnalyzer()); + qp = new QueryParser(Version.LUCENE_CURRENT, indexField, new StandardAnalyzer(Version.LUCENE_CURRENT)); queryParser.set(qp); } BooleanQuery bq = new BooleanQuery(); diff --git a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java index 81b9061d2eb..bca3a2f1755 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java @@ -57,7 +57,7 @@ public class SimpleNaiveBayesClassifierTest extends ClassificationTestBaseIn other words, all the query's terms must appear, but it doesn't matter in * what fields they appear.

*/ - public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map boosts) { - this(fields, analyzer); + public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer, Map boosts) { + this(matchVersion, fields, analyzer); this.boosts = boosts; } @@ -89,8 +90,8 @@ public class MultiFieldQueryParser extends QueryParser *

In other words, all the query's terms must appear, but it doesn't matter in * what fields they appear.

*/ - public MultiFieldQueryParser(String[] fields, Analyzer analyzer) { - super(null, analyzer); + public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer) { + super(matchVersion, null, analyzer); this.fields = fields; } @@ -238,6 +239,7 @@ public class MultiFieldQueryParser extends QueryParser * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) * * + * @param matchVersion Lucene version to match; this is passed through to QueryParser. * @param queries Queries strings to parse * @param fields Fields to search on * @param analyzer Analyzer to use @@ -245,13 +247,15 @@ public class MultiFieldQueryParser extends QueryParser * @throws IllegalArgumentException if the length of the queries array differs * from the length of the fields array */ - public static Query parse(String[] queries, String[] fields, Analyzer analyzer) throws ParseException { + public static Query parse(Version matchVersion, String[] queries, String[] fields, + Analyzer analyzer) throws ParseException + { if (queries.length != fields.length) throw new IllegalArgumentException("queries.length != fields.length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(fields[i], analyzer); + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.parse(queries[i]); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { @@ -284,6 +288,7 @@ public class MultiFieldQueryParser extends QueryParser * * * + * @param matchVersion Lucene version to match; this is passed through to QueryParser. * @param query Query string to parse * @param fields Fields to search on * @param flags Flags describing the fields @@ -292,13 +297,13 @@ public class MultiFieldQueryParser extends QueryParser * @throws IllegalArgumentException if the length of the fields array differs * from the length of the flags array */ - public static Query parse(String query, String[] fields, + public static Query parse(Version matchVersion, String query, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (fields.length != flags.length) throw new IllegalArgumentException("fields.length != flags.length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(fields[i], analyzer); + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.parse(query); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { @@ -332,6 +337,7 @@ public class MultiFieldQueryParser extends QueryParser * * * + * @param matchVersion Lucene version to match; this is passed through to QueryParser. * @param queries Queries string to parse * @param fields Fields to search on * @param flags Flags describing the fields @@ -340,7 +346,7 @@ public class MultiFieldQueryParser extends QueryParser * @throws IllegalArgumentException if the length of the queries, fields, * and flags array differ */ - public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags, + public static Query parse(Version matchVersion, String[] queries, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (!(queries.length == fields.length && queries.length == flags.length)) @@ -348,7 +354,7 @@ public class MultiFieldQueryParser extends QueryParser BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(fields[i], analyzer); + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.parse(queries[i]); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index af0db955ecf..2d7e29b29d4 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -12,6 +12,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.Version; /** * This class is generated by JavaCC. The most important method is @@ -75,6 +76,14 @@ import org.apache.lucene.search.TermQuery; *

NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. + * + * + *

NOTE: You must specify the required {@link Version} + * compatibility when creating QueryParser: + *

*/ public class QueryParser extends QueryParserBase implements QueryParserConstants { /** The default operator for parsing queries. @@ -83,12 +92,13 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants static public enum Operator { OR, AND } /** Create a query parser. + * @param matchVersion Lucene version to match. See above. * @param f the default field for query terms. * @param a used to find terms in the query text. */ - public QueryParser(String f, Analyzer a) { + public QueryParser(Version matchVersion, String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); - init(f, a); + init(matchVersion, f, a); } // * Query ::= ( Clause )* diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj index 19ec6252f2c..200d5e992cb 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj @@ -36,6 +36,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.Version; /** * This class is generated by JavaCC. The most important method is @@ -99,6 +100,14 @@ import org.apache.lucene.search.TermQuery; *

NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. + * + * + *

NOTE: You must specify the required {@link Version} + * compatibility when creating QueryParser: + *

*/ public class QueryParser extends QueryParserBase { /** The default operator for parsing queries. @@ -107,12 +116,13 @@ public class QueryParser extends QueryParserBase { static public enum Operator { OR, AND } /** Create a query parser. + * @param matchVersion Lucene version to match. See above. * @param f the default field for query terms. * @param a used to find terms in the query text. */ - public QueryParser(String f, Analyzer a) { + public QueryParser(Version matchVersion, String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); - init(f, a); + init(matchVersion, f, a); } } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index d84bbc7a886..b42dd36fc52 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -33,6 +33,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanQuery.TooManyClauses; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.Version; /** This class is overridden by QueryParser in QueryParser.jj * and acts to separate the majority of the Java code from the .jj grammar file. @@ -88,10 +89,11 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer } /** Initializes a query parser. Called by the QueryParser constructor + * @param matchVersion Lucene version to match. * @param f the default field for query terms. * @param a used to find terms in the query text. */ - public void init(String f, Analyzer a) { + public void init(Version matchVersion, String f, Analyzer a) { setAnalyzer(a); field = f; setAutoGeneratePhraseQueries(false); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java index fc5dd7cded6..0e76a1969ef 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java @@ -38,6 +38,7 @@ import org.apache.lucene.search.spans.SpanNotQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.Version; /** * QueryParser which permits complex phrase query syntax eg "(john jon @@ -79,8 +80,8 @@ public class ComplexPhraseQueryParser extends QueryParser { private ComplexPhraseQuery currentPhraseQuery = null; - public ComplexPhraseQueryParser(String f, Analyzer a) { - super(f, a); + public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) { + super(matchVersion, f, a); } @Override diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java index 37398da4e4b..8c0e26b2548 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java @@ -84,18 +84,24 @@ public class ExtendableQueryParser extends QueryParser { /** * Creates a new {@link ExtendableQueryParser} instance * + * @param matchVersion + * the lucene version to use. * @param f * the default query field * @param a * the analyzer used to find terms in a query string */ - public ExtendableQueryParser(final String f, final Analyzer a) { - this(f, a, DEFAULT_EXTENSION); + public ExtendableQueryParser(final Version matchVersion, final String f, + final Analyzer a) { + this(matchVersion, f, a, DEFAULT_EXTENSION); + } /** * Creates a new {@link ExtendableQueryParser} instance * + * @param matchVersion + * the lucene version to use. * @param f * the default query field * @param a @@ -103,9 +109,9 @@ public class ExtendableQueryParser extends QueryParser { * @param ext * the query parser extensions */ - public ExtendableQueryParser(final String f, + public ExtendableQueryParser(final Version matchVersion, final String f, final Analyzer a, final Extensions ext) { - super(f, a); + super(matchVersion, f, a); this.defaultField = f; this.extensions = ext; } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java index 21e9eeb238f..424d2c7571a 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java @@ -86,7 +86,7 @@ public class UserInputQueryBuilder implements QueryBuilder { * @return QueryParser */ protected QueryParser createQueryParser(String fieldName, Analyzer analyzer) { - return new QueryParser(fieldName, analyzer); + return new QueryParser(Version.LUCENE_CURRENT, fieldName, analyzer); } } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java index 3c941bb8c0c..9d060f1d8ac 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java @@ -120,7 +120,7 @@ public class TestAnalyzingQueryParser extends LuceneTestCase { assertEquals("Should have returned nothing", true, ex); ex = false; - AnalyzingQueryParser qp = new AnalyzingQueryParser(FIELD, a); + AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a); try{ qp.analyzeSingleChunk(FIELD, "", "not a single chunk"); } catch (ParseException e){ @@ -212,7 +212,7 @@ public class TestAnalyzingQueryParser extends LuceneTestCase { } private Query getAnalyzedQuery(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException { - AnalyzingQueryParser qp = new AnalyzingQueryParser(FIELD, a); + AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a); qp.setAllowLeadingWildcard(allowLeadingWildcard); org.apache.lucene.search.Query q = qp.parse(s); return q; @@ -264,7 +264,7 @@ public class TestAnalyzingQueryParser extends LuceneTestCase { public void testByteTerms() throws Exception { String s = "เข"; Analyzer analyzer = new MockBytesAnalyzer(); - QueryParser qp = new AnalyzingQueryParser(FIELD, analyzer); + QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, analyzer); Query q = qp.parse("[เข TO เข]"); assertEquals(true, isAHit(q, s, analyzer)); } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java index 67bdde4d2db..1fa596a8f4f 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java @@ -40,7 +40,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { public void testMultiAnalyzer() throws ParseException { - QueryParser qp = new QueryParser("", new MultiAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "", new MultiAnalyzer()); // trivial, no multiple tokens: assertEquals("foo", qp.parse("foo").toString()); @@ -113,7 +113,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { } public void testPosIncrementAnalyzer() throws ParseException { - QueryParser qp = new QueryParser("", new PosIncrementAnalyzer()); + QueryParser qp = new QueryParser(Version.LUCENE_4_0, "", new PosIncrementAnalyzer()); assertEquals("quick brown", qp.parse("the quick brown").toString()); assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); } @@ -234,7 +234,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { private final static class DumbQueryParser extends QueryParser { public DumbQueryParser(String f, Analyzer a) { - super(f, a); + super(TEST_VERSION_CURRENT, f, a); } /** expose super's version */ diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java index ae3b50b4ba5..80f4e3d6edc 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java @@ -62,18 +62,18 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] fields = {"b", "t"}; Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; TestQueryParser.QPTestAnalyzer a = new TestQueryParser.QPTestAnalyzer(); - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, a); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, a); Query q = mfqp.parse(qtxt); assertEquals(expectedRes, q.toString()); - q = MultiFieldQueryParser.parse(qtxt, fields, occur, a); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, qtxt, fields, occur, a); assertEquals(expectedRes, q.toString()); } public void testSimple() throws Exception { String[] fields = {"b", "t"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random())); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random())); Query q = mfqp.parse("one"); assertEquals("b:one t:one", q.toString()); @@ -136,7 +136,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { boosts.put("b", Float.valueOf(5)); boosts.put("t", Float.valueOf(10)); String[] fields = {"b", "t"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random()), boosts); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random()), boosts); //Check for simple @@ -162,24 +162,24 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testStaticMethod1() throws ParseException { String[] fields = {"b", "t"}; String[] queries = {"one", "two"}; - Query q = MultiFieldQueryParser.parse(queries, fields, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, new MockAnalyzer(random())); assertEquals("b:one t:two", q.toString()); String[] queries2 = {"+one", "+two"}; - q = MultiFieldQueryParser.parse(queries2, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries2, fields, new MockAnalyzer(random())); assertEquals("(+b:one) (+t:two)", q.toString()); String[] queries3 = {"one", "+two"}; - q = MultiFieldQueryParser.parse(queries3, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries3, fields, new MockAnalyzer(random())); assertEquals("b:one (+t:two)", q.toString()); String[] queries4 = {"one +more", "+two"}; - q = MultiFieldQueryParser.parse(queries4, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries4, fields, new MockAnalyzer(random())); assertEquals("(b:one +b:more) (+t:two)", q.toString()); String[] queries5 = {"blah"}; try { - q = MultiFieldQueryParser.parse(queries5, fields, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries5, fields, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -189,11 +189,11 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { TestQueryParser.QPTestAnalyzer stopA = new TestQueryParser.QPTestAnalyzer(); String[] queries6 = {"((+stop))", "+((stop))"}; - q = MultiFieldQueryParser.parse(queries6, fields, stopA); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries6, fields, stopA); assertEquals("", q.toString()); String[] queries7 = {"one ((+stop)) +more", "+((stop)) +two"}; - q = MultiFieldQueryParser.parse(queries7, fields, stopA); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries7, fields, stopA); assertEquals("(b:one +b:more) (+t:two)", q.toString()); } @@ -201,15 +201,15 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testStaticMethod2() throws ParseException { String[] fields = {"b", "t"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse("one", fields, flags, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random())); assertEquals("+b:one -t:one", q.toString()); - q = MultiFieldQueryParser.parse("one two", fields, flags, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random())); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse("blah", fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -221,15 +221,15 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { //int[] flags = {MultiFieldQueryParser.REQUIRED_FIELD, MultiFieldQueryParser.PROHIBITED_FIELD}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse("one", fields, flags, new MockAnalyzer(random()));//, fields, flags, new MockAnalyzer(random)); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random()));//, fields, flags, new MockAnalyzer(random)); assertEquals("+b:one -t:one", q.toString()); - q = MultiFieldQueryParser.parse("one two", fields, flags, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random())); assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse("blah", fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -241,12 +241,12 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] fields = {"f1", "f2", "f3"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD}; - Query q = MultiFieldQueryParser.parse(queries, fields, flags, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random())); assertEquals("+f1:one -f2:two f3:three", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(queries, fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -257,12 +257,12 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { String[] queries = {"one", "two"}; String[] fields = {"b", "t"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; - Query q = MultiFieldQueryParser.parse(queries, fields, flags, new MockAnalyzer(random())); + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random())); assertEquals("+b:one -t:two", q.toString()); try { BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; - q = MultiFieldQueryParser.parse(queries, fields, flags2, new MockAnalyzer(random())); + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random())); fail(); } catch(IllegalArgumentException e) { // expected exception, array length differs @@ -271,7 +271,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testAnalyzerReturningNull() throws ParseException { String[] fields = new String[] { "f1", "f2", "f3" }; - MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new AnalyzerReturningNull()); + MultiFieldQueryParser parser = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new AnalyzerReturningNull()); Query q = parser.parse("bla AND blo"); assertEquals("+(f2:bla f3:bla) +(f2:blo f3:blo)", q.toString()); // the following queries are not affected as their terms are not analyzed anyway: @@ -293,7 +293,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { iw.shutdown(); MultiFieldQueryParser mfqp = - new MultiFieldQueryParser(new String[] {"body"}, analyzer); + new MultiFieldQueryParser(TEST_VERSION_CURRENT, new String[] {"body"}, analyzer); mfqp.setDefaultOperator(QueryParser.Operator.AND); Query q = mfqp.parse("the footest"); IndexReader ir = DirectoryReader.open(ramDir); @@ -334,7 +334,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase { public void testSimpleRegex() throws ParseException { String[] fields = new String[] {"a", "b"}; - MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random())); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random())); BooleanQuery bq = new BooleanQuery(true); bq.add(new RegexpQuery(new Term("a", "[a-z][123]")), Occur.SHOULD); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java index 73c6c34ad20..9f986c83099 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java @@ -97,7 +97,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase { new TokenAndPos("c", 2) }; - QueryParser qp = new QueryParser("field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); Query q = qp.parse("\"this text is acually ignored\""); assertTrue("wrong query type!", q instanceof MultiPhraseQuery); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java index 472f1f4b56e..150733efed8 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java @@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic; */ import java.io.IOException; +import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -45,7 +46,7 @@ public class TestQueryParser extends QueryParserTestBase { public static class QPTestParser extends QueryParser { public QPTestParser(String f, Analyzer a) { - super(f, a); + super(TEST_VERSION_CURRENT, f, a); } @Override @@ -63,7 +64,7 @@ public class TestQueryParser extends QueryParserTestBase { public QueryParser getParser(Analyzer a) throws Exception { if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); - QueryParser qp = new QueryParser(getDefaultField(), a); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, getDefaultField(), a); qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); return qp; } @@ -170,7 +171,7 @@ public class TestQueryParser extends QueryParserTestBase { } public void testFuzzySlopeExtendability() throws ParseException { - QueryParser qp = new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { @Override Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) @@ -194,7 +195,7 @@ public class TestQueryParser extends QueryParserTestBase { @Override public void testStarParsing() throws Exception { final int[] type = new int[1]; - QueryParser qp = new QueryParser("field", + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { @Override protected Query getWildcardQuery(String field, String termStr) { @@ -281,7 +282,7 @@ public class TestQueryParser extends QueryParserTestBase { Analyzer morePrecise = new Analyzer2(); public SmartQueryParser() { - super("field", new Analyzer1()); + super(TEST_VERSION_CURRENT, "field", new Analyzer1()); } @Override @@ -295,7 +296,7 @@ public class TestQueryParser extends QueryParserTestBase { @Override public void testNewFieldQuery() throws Exception { /** ordinary behavior, synonyms form uncoordinated boolean query */ - QueryParser dumb = new QueryParser("field", + QueryParser dumb = new QueryParser(TEST_VERSION_CURRENT, "field", new Analyzer1()); BooleanQuery expanded = new BooleanQuery(true); expanded.add(new TermQuery(new Term("field", "dogs")), @@ -332,7 +333,7 @@ public class TestQueryParser extends QueryParserTestBase { BooleanQuery expected = new BooleanQuery(true); expected.add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD); expected.add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser("field", new MockSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer()); assertEquals(expected, qp.parse("dogs")); assertEquals(expected, qp.parse("\"dogs\"")); qp.setDefaultOperator(Operator.AND); @@ -348,7 +349,7 @@ public class TestQueryParser extends QueryParserTestBase { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "old")); expected.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") }); - QueryParser qp = new QueryParser("field", new MockSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer()); assertEquals(expected, qp.parse("\"old dogs\"")); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("\"old dogs\"")); @@ -402,7 +403,7 @@ public class TestQueryParser extends QueryParserTestBase { BooleanQuery expected = new BooleanQuery(true); expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); expected.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("国")); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("国")); @@ -418,7 +419,7 @@ public class TestQueryParser extends QueryParserTestBase { inner.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner, BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("中国")); expected.setBoost(2.0f); assertEquals(expected, qp.parse("中国^2")); @@ -436,7 +437,7 @@ public class TestQueryParser extends QueryParserTestBase { inner2.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner2.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner2, BooleanClause.Occur.SHOULD); - QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("中国国")); expected.setBoost(2.0f); assertEquals(expected, qp.parse("中国国^2")); @@ -450,7 +451,7 @@ public class TestQueryParser extends QueryParserTestBase { inner.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner, BooleanClause.Occur.MUST); - QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("中国")); expected.setBoost(2.0f); @@ -469,7 +470,7 @@ public class TestQueryParser extends QueryParserTestBase { inner2.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); inner2.add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD); expected.add(inner2, BooleanClause.Occur.MUST); - QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("中国国")); expected.setBoost(2.0f); @@ -481,7 +482,7 @@ public class TestQueryParser extends QueryParserTestBase { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "中")); expected.add(new Term[] { new Term("field", "国"), new Term("field", "國")}); - QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("\"中国\"")); expected.setBoost(2.0f); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java index 4d0950c5adc..b82748d0702 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java @@ -87,7 +87,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { } private void checkBadQuery(String qString) { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); qp.setInOrder(inOrder); Throwable expected = null; try { @@ -101,7 +101,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { private void checkMatches(String qString, String expectedVals) throws Exception { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); qp.setInOrder(inOrder); qp.setFuzzyPrefixLength(1); // usually a good idea @@ -141,7 +141,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { } public void testHashcodeEquals() throws Exception { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); qp.setInOrder(true); qp.setFuzzyPrefixLength(1); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java index a2899d8a5e5..4d6bba4cc73 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java @@ -48,8 +48,8 @@ public class TestExtendableQueryParser extends TestQueryParser { if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); QueryParser qp = extensions == null ? new ExtendableQueryParser( - getDefaultField(), a) : new ExtendableQueryParser( - getDefaultField(), a, extensions); + TEST_VERSION_CURRENT, getDefaultField(), a) : new ExtendableQueryParser( + TEST_VERSION_CURRENT, getDefaultField(), a, extensions); qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); return qp; } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index b0f40014bc1..040258fffa3 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -333,13 +333,13 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testSuggestStopFilter() throws Exception { - final CharArraySet stopWords = StopFilter.makeStopSet("a"); + final CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "a"); Analyzer indexAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { MockTokenizer tokens = new MockTokenizer(); return new TokenStreamComponents(tokens, - new StopFilter(tokens, stopWords)); + new StopFilter(TEST_VERSION_CURRENT, tokens, stopWords)); } }; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java index 45883582451..dca3193a8cb 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java @@ -46,7 +46,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_LINEAR, @@ -84,7 +84,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { }; File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); // BlenderType.LINEAR is used by default (remove position*10%) BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a); @@ -125,7 +125,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { }; File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); // if factor is small, we don't get the expected element BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, @@ -175,7 +175,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { }; File tempDir = createTempDir("BlendedInfixSuggesterTest"); - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); + Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET); // if factor is small, we don't get the expected element BlendedInfixSuggester suggester = new BlendedInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java index aaf6605a5d3..6d1bfb19c42 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java @@ -244,8 +244,8 @@ public class TestFreeTextSuggester extends LuceneTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(); - CharArraySet stopSet = StopFilter.makeStopSet("of"); - return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet)); + CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of"); + return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet)); } }; @@ -272,8 +272,8 @@ public class TestFreeTextSuggester extends LuceneTestCase { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(); - CharArraySet stopSet = StopFilter.makeStopSet("of"); - return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet)); + CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of"); + return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet)); } }; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java index e42342a1ee8..d57a077f84a 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.util.CharArraySet; public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testEndNotStopWord() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet("to"); + CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -47,7 +47,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testEndIsStopWord() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet("to"); + CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to ")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -66,7 +66,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMidStopWord() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet("to"); + CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to school")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -86,7 +86,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMultipleStopWords() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); + CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the school")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -106,7 +106,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMultipleStopWordsEnd() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); + CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the")); TokenStream filter = new SuggestStopFilter(stream, stopWords); @@ -126,7 +126,7 @@ public class TestSuggestStopFilter extends BaseTokenStreamTestCase { public void testMultipleStopWordsEnd2() throws Exception { - CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); + CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the ")); TokenStream filter = new SuggestStopFilter(stream, stopWords); diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index ea9ebefebfb..77de7ecf5cc 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -675,7 +675,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar IndexSchema schema = core.getLatestSchema(); String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType"); FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); - Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer() + Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion) : fieldType.getQueryAnalyzer(); //TODO: There's got to be a better way! Where's Spring when you need it? queryConverter.setAnalyzer(analyzer); diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java index d74bea77c4e..1627fc7c6ab 100644 --- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java @@ -82,7 +82,7 @@ public class ManagedStopFilterFactory extends BaseManagedTokenFilterFactory { // which is slightly inefficient to do for every instance of the managed filter // but ManagedResource's don't have access to the luceneMatchVersion boolean ignoreCase = args.getBooleanArg("ignoreCase"); - stopWords = new CharArraySet(managedWords.size(), ignoreCase); + stopWords = new CharArraySet(luceneMatchVersion, managedWords.size(), ignoreCase); stopWords.addAll(managedWords); } @@ -94,6 +94,6 @@ public class ManagedStopFilterFactory extends BaseManagedTokenFilterFactory { if (stopWords == null) { throw new IllegalStateException("Managed stopwords not initialized correctly!"); } - return new StopFilter(input, stopWords); + return new StopFilter(luceneMatchVersion, input, stopWords); } } diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java index 7d0165f130b..0cf9bc6b771 100644 --- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -88,7 +88,7 @@ class ComplexPhraseQParser extends QParser { defaultField = getReq().getSchema().getDefaultSearchFieldName(); } - lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()); + lparser = new ComplexPhraseQueryParser(getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, getReq().getSchema().getQueryAnalyzer()); if (localParams != null) inOrder = localParams.getBool("inOrder", inOrder); diff --git a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java index ddde5004654..01f01a39de9 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java @@ -74,7 +74,7 @@ public abstract class SolrSpellChecker { analyzer = fieldType.getQueryAnalyzer(); } if (analyzer == null) { - analyzer = new WhitespaceAnalyzer(); + analyzer = new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion); } return name; } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml b/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml index ff90e0dfe4c..ae157769051 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml @@ -37,11 +37,11 @@ - - + + - - + + @@ -49,8 +49,8 @@ - - + + diff --git a/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java b/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java index a544f86392a..3ae0dc2db47 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestLuceneMatchVersion.java @@ -23,7 +23,7 @@ import org.apache.solr.core.Config; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.FieldType; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; import org.junit.BeforeClass; @@ -56,18 +56,18 @@ public class TestLuceneMatchVersion extends SolrTestCaseJ4 { assertEquals(Version.LUCENE_4_0, (ana.getTokenizerFactory()).getLuceneMatchVersion()); assertEquals(Version.LUCENE_5_0, (ana.getTokenFilterFactories()[2]).getLuceneMatchVersion()); - // this is a hack to get the private matchVersion field in TurkishAnalyzer's class, may break in later lucene versions - we have no getter :( - final Field matchVersionField = TurkishAnalyzer.class.getDeclaredField("matchVersion"); + // this is a hack to get the private matchVersion field in StandardAnalyzer's superclass, may break in later lucene versions - we have no getter :( + final Field matchVersionField = StandardAnalyzer.class.getSuperclass().getDeclaredField("matchVersion"); matchVersionField.setAccessible(true); - type = schema.getFieldType("textTurkishAnalyzerDefault"); + type = schema.getFieldType("textStandardAnalyzerDefault"); Analyzer ana1 = type.getIndexAnalyzer(); - assertTrue(ana1 instanceof TurkishAnalyzer); + assertTrue(ana1 instanceof StandardAnalyzer); assertEquals(DEFAULT_VERSION, matchVersionField.get(ana1)); - type = schema.getFieldType("textTurkishAnalyzer40"); + type = schema.getFieldType("textStandardAnalyzer40"); ana1 = type.getIndexAnalyzer(); - assertTrue(ana1 instanceof TurkishAnalyzer); + assertTrue(ana1 instanceof StandardAnalyzer); assertEquals(Version.LUCENE_4_0, matchVersionField.get(ana1)); } } diff --git a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java index 0cba95bc851..7bbc46c2824 100644 --- a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java +++ b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java @@ -115,7 +115,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ Directory dir = newFSDirectory(newDir); IndexWriter iw = new IndexWriter( dir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer()) + new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)) ); Document doc = new Document(); doc.add(new TextField("id", "2", Field.Store.YES)); diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index c0a838cbc8a..2b84608130f 100644 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -172,7 +172,7 @@ public class HighlighterTest extends SolrTestCaseJ4 { @Test public void testTermOffsetsTokenStream() throws Exception { String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; - Analyzer a1 = new WhitespaceAnalyzer(); + Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n"); tokenStream.reset(); @@ -180,7 +180,7 @@ public class HighlighterTest extends SolrTestCaseJ4 { tokenStream); for( String v : multivalued ){ TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() ); - Analyzer a2 = new WhitespaceAnalyzer(); + Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); TokenStream ts2 = a2.tokenStream("", v); ts2.reset(); diff --git a/solr/core/src/test/org/apache/solr/search/TestSort.java b/solr/core/src/test/org/apache/solr/search/TestSort.java index 601ea5faa22..d1a909bdde6 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSort.java +++ b/solr/core/src/test/org/apache/solr/search/TestSort.java @@ -189,7 +189,7 @@ public class TestSort extends SolrTestCaseJ4 { for (int iterCnt = 0; iterCnt convert(String origQuery) { Collection result = new HashSet<>(); - WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); + WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT); try (TokenStream ts = analyzer.tokenStream("", origQuery)) { // TODO: support custom attributes diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java index 7f1cd737c54..bccdbbcf56b 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java @@ -40,7 +40,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void test() throws Exception { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer()); + converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); Collection tokens = converter.convert("field:foo"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1); @@ -50,7 +50,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testSpecialChars() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer()); + converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); String original = "field_with_underscore:value_with_underscore"; Collection tokens = converter.convert(original); assertTrue("tokens is null and it shouldn't be", tokens != null); @@ -96,7 +96,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testUnicode() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer()); + converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); // chinese text value Collection tokens = converter.convert("text_field:我购买了道具和服装。"); @@ -116,7 +116,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testMultipleClauses() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer()); + converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); // two field:value pairs should give two tokens Collection tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar"); @@ -133,7 +133,7 @@ public class SpellingQueryConverterTest extends LuceneTestCase { public void testRequiredOrProhibitedFlags() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); - converter.setAnalyzer(new WhitespaceAnalyzer()); + converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); { List tokens = new ArrayList<>(converter.convert("aaa bbb ccc")); diff --git a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java index 2fb2423f859..fb5c2b477fc 100644 --- a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java +++ b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java @@ -52,8 +52,8 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase { Tokenizer tokenizer = new KeywordTokenizer(); TokenStream filter = new PatternReplaceFilter(tokenizer, Pattern.compile("([^\\p{L}\\p{M}\\p{N}\\p{Cs}]*[\\p{L}\\p{M}\\p{N}\\p{Cs}\\_]+:)|([^\\p{L}\\p{M}\\p{N}\\p{Cs}])+"), " ", true); - filter = new LowerCaseFilter(filter); - filter = new TrimFilter(filter); + filter = new LowerCaseFilter(TEST_VERSION_CURRENT, filter); + filter = new TrimFilter(TEST_VERSION_CURRENT, filter); return new TokenStreamComponents(tokenizer, filter); } });