lucene 4: use CharArraySet for stem exclusions, stop words and articles and fix analyzer namespaces

This commit is contained in:
Igor Motov 2012-10-26 00:10:19 -04:00 committed by Shay Banon
parent 1cc5ee7ad9
commit b128b7a750
59 changed files with 152 additions and 185 deletions

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ASCIIFoldingFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -20,10 +20,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import com.google.common.base.Charsets; import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer;
@ -51,6 +49,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
@ -78,18 +77,20 @@ public class Analysis {
return value != null && "_none_".equals(value); return value != null && "_none_".equals(value);
} }
public static Set<?> parseStemExclusion(Settings settings, Set<?> defaultStemExclusion) { public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) {
String value = settings.get("stem_exclusion"); String value = settings.get("stem_exclusion");
if (value != null) { if (value != null) {
if ("_none_".equals(value)) { if ("_none_".equals(value)) {
return ImmutableSet.of(); return CharArraySet.EMPTY_SET;
} else { } else {
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value)); // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false);
} }
} }
String[] stopWords = settings.getAsArray("stem_exclusion", null); String[] stopWords = settings.getAsArray("stem_exclusion", null);
if (stopWords != null) { if (stopWords != null) {
return ImmutableSet.copyOf(Iterators.forArray(stopWords)); // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(version, ImmutableList.of(stopWords), false);
} else { } else {
return defaultStemExclusion; return defaultStemExclusion;
} }
@ -125,7 +126,7 @@ public class Analysis {
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet()) .put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
.immutableMap(); .immutableMap();
public static Set<?> parseArticles(Environment env, Settings settings, Version version) { public static CharArraySet parseArticles(Environment env, Settings settings, Version version) {
String value = settings.get("articles"); String value = settings.get("articles");
if (value != null) { if (value != null) {
if ("_none_".equals(value)) { if ("_none_".equals(value)) {
@ -146,18 +147,22 @@ public class Analysis {
return null; return null;
} }
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords, Version version) { public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) {
return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false));
}
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignore_case) {
String value = settings.get("stopwords"); String value = settings.get("stopwords");
if (value != null) { if (value != null) {
if ("_none_".equals(value)) { if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET; return CharArraySet.EMPTY_SET;
} else { } else {
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), settings.getAsBoolean("stopwords_case", false)); return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case);
} }
} }
String[] stopWords = settings.getAsArray("stopwords", null); String[] stopWords = settings.getAsArray("stopwords", null);
if (stopWords != null) { if (stopWords != null) {
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, settings.getAsBoolean("stopwords_case", false)); CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case);
for (String stopWord : stopWords) { for (String stopWord : stopWords) {
if (namedStopWords.containsKey(stopWord)) { if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord)); setStopWords.addAll(namedStopWords.get(stopWord));
@ -169,7 +174,7 @@ public class Analysis {
} }
List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords"); List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords");
if (pathLoadedStopWords != null) { if (pathLoadedStopWords != null) {
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), settings.getAsBoolean("stopwords_case", false)); CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case);
for (String stopWord : pathLoadedStopWords) { for (String stopWord : pathLoadedStopWords) {
if (namedStopWords.containsKey(stopWord)) { if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord)); setStopWords.addAll(namedStopWords.get(stopWord));

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
arabicAnalyzer = new ArabicAnalyzer(version, arabicAnalyzer = new ArabicAnalyzer(version,
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arme
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new ArmenianAnalyzer(version, analyzer = new ArmenianAnalyzer(version,
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<Basque
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new BasqueAnalyzer(version, analyzer = new BasqueAnalyzer(version,
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new BrazilianAnalyzer(version, analyzer = new BrazilianAnalyzer(version,
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter; import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -36,21 +38,16 @@ import java.util.Set;
*/ */
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory { public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions; private final CharArraySet exclusions;
@Inject @Inject
public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion"); this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
} }
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new BrazilianStemFilter(tokenStream, exclusions); return new BrazilianStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
} }
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bul
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new BulgarianAnalyzer(version, analyzer = new BulgarianAnalyzer(version,
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<Catal
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new CatalanAnalyzer(version, analyzer = new CatalanAnalyzer(version,
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
@ -39,7 +38,7 @@ public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyz
@Inject @Inject
public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version); CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
analyzer = new CJKAnalyzer(version, stopWords); analyzer = new CJKAnalyzer(version, stopWords);
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new CzechAnalyzer(version, analyzer = new CzechAnalyzer(version,
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.da.DanishAnalyzer; import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Danish
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new DanishAnalyzer(version, analyzer = new DanishAnalyzer(version,
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new DutchAnalyzer(version, analyzer = new DutchAnalyzer(version,
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,38 +19,31 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.nl.DutchStemFilter; import org.apache.lucene.analysis.nl.DutchStemFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory { public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions; private final CharArraySet exclusions;
@Inject @Inject
public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion"); this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
} }
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new DutchStemFilter(tokenStream, exclusions); return new DutchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
} }
} }

View File

@ -20,7 +20,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.ElisionFilter; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -28,14 +29,12 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory { public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> articles; private final CharArraySet articles;
@Inject @Inject
public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
@ -45,10 +44,6 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
if (articles == null) { return new ElisionFilter(tokenStream, articles);
return new ElisionFilter(version, tokenStream);
} else {
return new ElisionFilter(version, tokenStream, articles);
}
} }
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Engli
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new EnglishAnalyzer(version, analyzer = new EnglishAnalyzer(version,
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -21,17 +21,14 @@ package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.document.Fieldable;
import java.io.IOException;
import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
* *
*/ */
public final class FieldNameAnalyzer extends Analyzer { public final class FieldNameAnalyzer extends AnalyzerWrapper {
private final ImmutableMap<String, Analyzer> analyzers; private final ImmutableMap<String, Analyzer> analyzers;
@ -51,23 +48,13 @@ public final class FieldNameAnalyzer extends Analyzer {
} }
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { protected Analyzer getWrappedAnalyzer(String fieldName) {
return getAnalyzer(fieldName).tokenStream(fieldName, reader); return getAnalyzer(fieldName);
} }
@Override @Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return getAnalyzer(fieldName).reusableTokenStream(fieldName, reader); return components;
}
@Override
public int getPositionIncrementGap(String fieldName) {
return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
@Override
public int getOffsetGap(Fieldable field) {
return getAnalyzer(field.name()).getOffsetGap(field);
} }
private Analyzer getAnalyzer(String name) { private Analyzer getAnalyzer(String name) {

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Finni
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new FinnishAnalyzer(version, analyzer = new FinnishAnalyzer(version,
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new FrenchAnalyzer(version, analyzer = new FrenchAnalyzer(version,
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -23,34 +23,29 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchStemFilter; import org.apache.lucene.analysis.fr.FrenchStemFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory { public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions; private final CharArraySet exclusions;
@Inject @Inject
public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion"); this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
} }
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new FrenchStemFilter(tokenStream, exclusions); return new FrenchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
} }
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Gali
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new GalicianAnalyzer(version, analyzer = new GalicianAnalyzer(version,
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new GermanAnalyzer(version, analyzer = new GermanAnalyzer(version,
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,38 +19,31 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanStemFilter; import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory { public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions; private final CharArraySet exclusions;
@Inject @Inject
public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion"); this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
} }
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new GermanStemFilter(tokenStream, exclusions); return new GermanStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
} }
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hi.HindiAnalyzer; import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAn
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new HindiAnalyzer(version, analyzer = new HindiAnalyzer(version,
Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hu.HungarianAnalyzer; import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Hun
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new HungarianAnalyzer(version, analyzer = new HungarianAnalyzer(version,
Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.id.IndonesianAnalyzer; import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider<In
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new IndonesianAnalyzer(version, analyzer = new IndonesianAnalyzer(version,
Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.it.ItalianAnalyzer; import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Itali
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new ItalianAnalyzer(version, analyzer = new ItalianAnalyzer(version,
Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,9 +19,9 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.lv.LatvianAnalyzer; import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class LatvianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Latvi
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new LatvianAnalyzer(version, analyzer = new LatvianAnalyzer(version,
Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LengthFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.el.GreekLowerCaseFilter; import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.no.NorwegianAnalyzer; import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Nor
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new NorwegianAnalyzer(version, analyzer = new NorwegianAnalyzer(version,
Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,9 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer; import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -30,7 +31,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
@ -46,7 +46,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Patte
boolean lowercase = settings.getAsBoolean("lowercase", true); boolean lowercase = settings.getAsBoolean("lowercase", true);
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/); String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) { if (sPattern == null) {

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer; import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Po
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new PortugueseAnalyzer(version, analyzer = new PortugueseAnalyzer(version,
Analysis.parseStopWords(env, settings, PortugueseAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, PortugueseAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Roma
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new RomanianAnalyzer(version, analyzer = new RomanianAnalyzer(version,
Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new RussianAnalyzer(version, analyzer = new RussianAnalyzer(version,
Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ru.RussianStemFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -39,6 +39,6 @@ public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new RussianStemFilter(tokenStream); return new SnowballFilter(tokenStream, "Russian");
} }
} }

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -21,11 +21,12 @@ package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer; import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -51,7 +52,7 @@ import java.util.Set;
*/ */
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> { public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
private static final ImmutableMap<String, Set<?>> defaultLanguageStopwords = MapBuilder.<String, Set<?>>newMapBuilder() private static final ImmutableMap<String, CharArraySet> defaultLanguageStopwords = MapBuilder.<String, CharArraySet>newMapBuilder()
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET) .put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
.put("Dutch", DutchAnalyzer.getDefaultStopSet()) .put("Dutch", DutchAnalyzer.getDefaultStopSet())
.put("German", GermanAnalyzer.getDefaultStopSet()) .put("German", GermanAnalyzer.getDefaultStopSet())
@ -66,8 +67,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
String language = settings.get("language", settings.get("name", "English")); String language = settings.get("language", settings.get("name", "English"));
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of(); CharArraySet defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : CharArraySet.EMPTY_SET;
Set<?> stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version); CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
analyzer = new SnowballAnalyzer(version, language, stopWords); analyzer = new SnowballAnalyzer(version, language, stopWords);
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.es.SpanishAnalyzer; import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Spani
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new SpanishAnalyzer(version, analyzer = new SpanishAnalyzer(version,
Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -19,8 +19,9 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -28,8 +29,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
@ -40,7 +39,7 @@ public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stan
@Inject @Inject
public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
standardAnalyzer = new StandardAnalyzer(version, stopWords); standardAnalyzer = new StandardAnalyzer(version, stopWords);
standardAnalyzer.setMaxTokenLength(maxTokenLength); standardAnalyzer.setMaxTokenLength(maxTokenLength);

View File

@ -47,9 +47,9 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
tok = new StopFilter(matchVersion, tok, stopwords); tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) { return new TokenStreamComponents(src, tok) {
@Override @Override
protected boolean reset(final Reader reader) throws IOException { protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
return super.reset(reader); super.setReader(reader);
} }
}; };
} }

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
@ -38,7 +39,7 @@ import java.util.Map;
@AnalysisSettingsRequired @AnalysisSettingsRequired
public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory { public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory {
private final Map<String, String> dictionary; private final CharArrayMap<String> dictionary;
@Inject @Inject
public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
@ -48,7 +49,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
if (rules == null) { if (rules == null) {
throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured"); throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured");
} }
dictionary = new HashMap<String, String>(); dictionary = new CharArrayMap<String>(version, rules.size(), false);
parseRules(rules, dictionary, "=>"); parseRules(rules, dictionary, "=>");
} }
@ -57,7 +58,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary); return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary);
} }
static void parseRules(List<String> rules, Map<String, String> rulesMap, String mappingSep) { static void parseRules(List<String> rules, CharArrayMap<String> rulesMap, String mappingSep) {
for (String rule : rules) { for (String rule : rules) {
String key, override; String key, override;
List<String> mapping = Strings.splitSmart(rule, mappingSep, false); List<String> mapping = Strings.splitSmart(rule, mappingSep, false);

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.bg.BulgarianStemFilter; import org.apache.lucene.analysis.bg.BulgarianStemFilter;
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter; import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.en.KStemFilter; import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.es.SpanishLightStemFilter; import org.apache.lucene.analysis.es.SpanishLightStemFilter;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter; import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter; import org.apache.lucene.analysis.fr.FrenchLightStemFilter;

View File

@ -19,7 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* *
*/ */
@ -39,7 +38,7 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopAnal
@Inject @Inject
public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
this.stopAnalyzer = new StopAnalyzer(version, stopWords); this.stopAnalyzer = new StopAnalyzer(version, stopWords);
} }

View File

@ -19,9 +19,10 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -37,7 +38,7 @@ import java.util.Set;
*/ */
public class StopTokenFilterFactory extends AbstractTokenFilterFactory { public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> stopWords; private final CharArraySet stopWords;
private final boolean ignoreCase; private final boolean ignoreCase;
@ -46,14 +47,15 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
@Inject @Inject
public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_29)); this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase);
// LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29")));
} }
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
StopFilter filter = new StopFilter(version, tokenStream, stopWords, ignoreCase); StopFilter filter = new StopFilter(version, tokenStream, stopWords);
filter.setEnablePositionIncrements(enablePositionIncrements); filter.setEnablePositionIncrements(enablePositionIncrements);
return filter; return filter;
} }

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Swedi
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new SwedishAnalyzer(version, analyzer = new SwedishAnalyzer(version,
Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -20,6 +20,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser; import org.apache.lucene.analysis.synonym.SolrSynonymParser;
import org.apache.lucene.analysis.synonym.SynonymFilter; import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.analysis.synonym.SynonymMap;
@ -78,7 +80,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
} }
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings); final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings);
Analyzer analyzer = new ReusableAnalyzerBase() { Analyzer analyzer = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) { protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader); Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader);

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Turki
super(index, indexSettings, name, settings); super(index, indexSettings, name, settings);
analyzer = new TurkishAnalyzer(version, analyzer = new TurkishAnalyzer(version,
Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet(), version), Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
} }
@Override @Override

View File

@ -45,7 +45,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
@Override @Override
public Tokenizer create(Reader reader) { public Tokenizer create(Reader reader) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader); UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(version, reader);
tokenizer.setMaxTokenLength(maxTokenLength); tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer; return tokenizer;
} }

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator; import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.analysis.compound; package org.elasticsearch.index.analysis.compound;
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase; import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -30,8 +31,6 @@ import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis; import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/** /**
* Contains the common configuration settings between subclasses of this class. * Contains the common configuration settings between subclasses of this class.
*/ */
@ -41,7 +40,7 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
protected final int minSubwordSize; protected final int minSubwordSize;
protected final int maxSubwordSize; protected final int maxSubwordSize;
protected final boolean onlyLongestMatch; protected final boolean onlyLongestMatch;
protected final Set<?> wordList; protected final CharArraySet wordList;
@Inject @Inject
public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {