diff --git a/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java index 3278a339837..8a9f08dcf47 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.ASCIIFoldingFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; diff --git a/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/src/main/java/org/elasticsearch/index/analysis/Analysis.java index 4ccedab918f..16dd6e3053f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/Analysis.java +++ b/src/main/java/org/elasticsearch/index/analysis/Analysis.java @@ -20,10 +20,8 @@ package org.elasticsearch.index.analysis; import com.google.common.base.Charsets; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterators; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer; @@ -51,6 +49,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Strings; @@ -78,18 +77,20 @@ public class Analysis { return value != null && "_none_".equals(value); } - public static Set parseStemExclusion(Settings settings, Set defaultStemExclusion) { + public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) { String value = settings.get("stem_exclusion"); if (value != null) { if ("_none_".equals(value)) { - return ImmutableSet.of(); + return CharArraySet.EMPTY_SET; } else { - return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value)); + // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)? + return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false); } } String[] stopWords = settings.getAsArray("stem_exclusion", null); if (stopWords != null) { - return ImmutableSet.copyOf(Iterators.forArray(stopWords)); + // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)? + return new CharArraySet(version, ImmutableList.of(stopWords), false); } else { return defaultStemExclusion; } @@ -125,7 +126,7 @@ public class Analysis { .put("_turkish_", TurkishAnalyzer.getDefaultStopSet()) .immutableMap(); - public static Set parseArticles(Environment env, Settings settings, Version version) { + public static CharArraySet parseArticles(Environment env, Settings settings, Version version) { String value = settings.get("articles"); if (value != null) { if ("_none_".equals(value)) { @@ -146,18 +147,22 @@ public class Analysis { return null; } - public static Set parseStopWords(Environment env, Settings settings, Set defaultStopWords, Version version) { + public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) { + return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false)); + } + + public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignore_case) { String value = settings.get("stopwords"); if (value != null) { if ("_none_".equals(value)) { return CharArraySet.EMPTY_SET; } else { - return new CharArraySet(version, Strings.commaDelimitedListToSet(value), settings.getAsBoolean("stopwords_case", false)); + return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case); } } String[] stopWords = settings.getAsArray("stopwords", null); if (stopWords != null) { - CharArraySet setStopWords = new CharArraySet(version, stopWords.length, settings.getAsBoolean("stopwords_case", false)); + CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case); for (String stopWord : stopWords) { if (namedStopWords.containsKey(stopWord)) { setStopWords.addAll(namedStopWords.get(stopWord)); @@ -169,7 +174,7 @@ public class Analysis { } List pathLoadedStopWords = getWordList(env, settings, "stopwords"); if (pathLoadedStopWords != null) { - CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), settings.getAsBoolean("stopwords_case", false)); + CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case); for (String stopWord : pathLoadedStopWords) { if (namedStopWords.containsKey(stopWord)) { setStopWords.addAll(namedStopWords.get(stopWord)); diff --git a/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java index 94e4767c094..5da6921d482 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new BrazilianStemFilter(tokenStream, exclusions); + return new BrazilianStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java index 2acfc857c7d..085a362b062 100644 --- a/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version); analyzer = new CJKAnalyzer(version, stopWords); } diff --git a/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java index b66662b231f..cfdb9146718 100644 --- a/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new DutchStemFilter(tokenStream, exclusions); + return new DutchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java index 1bed5963046..9f472ec8f1c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java @@ -20,7 +20,8 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.fr.ElisionFilter; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.ElisionFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -28,14 +29,12 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * */ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory { - private final Set articles; + private final CharArraySet articles; @Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { @@ -45,10 +44,6 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory { @Override public TokenStream create(TokenStream tokenStream) { - if (articles == null) { - return new ElisionFilter(version, tokenStream); - } else { - return new ElisionFilter(version, tokenStream, articles); - } + return new ElisionFilter(tokenStream, articles); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java index 82996d0f666..e2db40e6345 100644 --- a/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider analyzers; @@ -51,23 +48,13 @@ public final class FieldNameAnalyzer extends Analyzer { } @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return getAnalyzer(fieldName).tokenStream(fieldName, reader); + protected Analyzer getWrappedAnalyzer(String fieldName) { + return getAnalyzer(fieldName); } @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return getAnalyzer(fieldName).reusableTokenStream(fieldName, reader); - } - - @Override - public int getPositionIncrementGap(String fieldName) { - return getAnalyzer(fieldName).getPositionIncrementGap(fieldName); - } - - @Override - public int getOffsetGap(Fieldable field) { - return getAnalyzer(field.name()).getOffsetGap(field); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return components; } private Analyzer getAnalyzer(String name) { diff --git a/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java index d2d8029e969..098dfc6e884 100644 --- a/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new FrenchStemFilter(tokenStream, exclusions); + return new FrenchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java index c1c8ab46afa..3c2b463d74d 100644 --- a/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new GermanStemFilter(tokenStream, exclusions); + return new GermanStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java index 8185947f986..2df095e6a14 100644 --- a/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/); if (sPattern == null) { diff --git a/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java index 7e96ce349af..9a59c651f79 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.PorterStemFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.en.PorterStemFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; diff --git a/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java index 372d034f7eb..2e64c235c0c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private static final ImmutableMap> defaultLanguageStopwords = MapBuilder.>newMapBuilder() + private static final ImmutableMap defaultLanguageStopwords = MapBuilder.newMapBuilder() .put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET) .put("Dutch", DutchAnalyzer.getDefaultStopSet()) .put("German", GermanAnalyzer.getDefaultStopSet()) @@ -66,8 +67,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.>of(); - Set stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version); + CharArraySet defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : CharArraySet.EMPTY_SET; + CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version); analyzer = new SnowballAnalyzer(version, language, stopWords); } diff --git a/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java index 2f6c19ee4d5..dce0e329dfa 100644 --- a/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); standardAnalyzer = new StandardAnalyzer(version, stopWords); standardAnalyzer.setMaxTokenLength(maxTokenLength); diff --git a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java index 2b03fc82999..71ecf6d23cb 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java @@ -47,9 +47,9 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase { tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override - protected boolean reset(final Reader reader) throws IOException { + protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); - return super.reset(reader); + super.setReader(reader); } }; } diff --git a/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java index 70ba9e142e1..d21bf73f21e 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; +import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.util.Version; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Strings; @@ -38,7 +39,7 @@ import java.util.Map; @AnalysisSettingsRequired public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory { - private final Map dictionary; + private final CharArrayMap dictionary; @Inject public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { @@ -48,7 +49,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor if (rules == null) { throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured"); } - dictionary = new HashMap(); + dictionary = new CharArrayMap(version, rules.size(), false); parseRules(rules, dictionary, "=>"); } @@ -57,7 +58,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary); } - static void parseRules(List rules, Map rulesMap, String mappingSep) { + static void parseRules(List rules, CharArrayMap rulesMap, String mappingSep) { for (String rule : rules) { String key, override; List mapping = Strings.splitSmart(rule, mappingSep, false); diff --git a/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java index 7baeb272c9e..668f04c43ce 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.PorterStemFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.bg.BulgarianStemFilter; @@ -31,6 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter; import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; import org.apache.lucene.analysis.en.EnglishPossessiveFilter; import org.apache.lucene.analysis.en.KStemFilter; +import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.es.SpanishLightStemFilter; import org.apache.lucene.analysis.fi.FinnishLightStemFilter; import org.apache.lucene.analysis.fr.FrenchLightStemFilter; diff --git a/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java index 0780301768d..7939c81ba2c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java @@ -19,7 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * */ @@ -39,7 +38,7 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); this.stopAnalyzer = new StopAnalyzer(version, stopWords); } diff --git a/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java index 15c361568f6..8c8e8929cc8 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -37,7 +38,7 @@ import java.util.Set; */ public class StopTokenFilterFactory extends AbstractTokenFilterFactory { - private final Set stopWords; + private final CharArraySet stopWords; private final boolean ignoreCase; @@ -46,14 +47,15 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory { @Inject public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); this.ignoreCase = settings.getAsBoolean("ignore_case", false); - this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_29)); + this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase); + // LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined + this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29"))); } @Override public TokenStream create(TokenStream tokenStream) { - StopFilter filter = new StopFilter(version, tokenStream, stopWords, ignoreCase); + StopFilter filter = new StopFilter(version, tokenStream, stopWords); filter.setEnablePositionIncrements(enablePositionIncrements); return filter; } diff --git a/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java index 5bdb1462379..8e2b5b741dd 100644 --- a/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider wordList; + protected final CharArraySet wordList; @Inject public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {