lucene 4: use CharArraySet for stem exclusions, stop words and articles and fix analyzer namespaces

This commit is contained in:
Igor Motov 2012-10-26 00:10:19 -04:00 committed by Shay Banon
parent 1cc5ee7ad9
commit b128b7a750
59 changed files with 152 additions and 185 deletions

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ASCIIFoldingFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -20,10 +20,8 @@
package org.elasticsearch.index.analysis;
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
@ -51,6 +49,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
@ -78,18 +77,20 @@ public class Analysis {
return value != null && "_none_".equals(value);
}
public static Set<?> parseStemExclusion(Settings settings, Set<?> defaultStemExclusion) {
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) {
String value = settings.get("stem_exclusion");
if (value != null) {
if ("_none_".equals(value)) {
return ImmutableSet.of();
return CharArraySet.EMPTY_SET;
} else {
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false);
}
}
String[] stopWords = settings.getAsArray("stem_exclusion", null);
if (stopWords != null) {
return ImmutableSet.copyOf(Iterators.forArray(stopWords));
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(version, ImmutableList.of(stopWords), false);
} else {
return defaultStemExclusion;
}
@ -125,7 +126,7 @@ public class Analysis {
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
.immutableMap();
public static Set<?> parseArticles(Environment env, Settings settings, Version version) {
public static CharArraySet parseArticles(Environment env, Settings settings, Version version) {
String value = settings.get("articles");
if (value != null) {
if ("_none_".equals(value)) {
@ -146,18 +147,22 @@ public class Analysis {
return null;
}
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords, Version version) {
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) {
return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false));
}
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignore_case) {
String value = settings.get("stopwords");
if (value != null) {
if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET;
} else {
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), settings.getAsBoolean("stopwords_case", false));
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case);
}
}
String[] stopWords = settings.getAsArray("stopwords", null);
if (stopWords != null) {
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, settings.getAsBoolean("stopwords_case", false));
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case);
for (String stopWord : stopWords) {
if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord));
@ -169,7 +174,7 @@ public class Analysis {
}
List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords");
if (pathLoadedStopWords != null) {
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), settings.getAsBoolean("stopwords_case", false));
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case);
for (String stopWord : pathLoadedStopWords) {
if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord));

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
super(index, indexSettings, name, settings);
arabicAnalyzer = new ArabicAnalyzer(version,
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arme
super(index, indexSettings, name, settings);
analyzer = new ArmenianAnalyzer(version,
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<Basque
super(index, indexSettings, name, settings);
analyzer = new BasqueAnalyzer(version,
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
super(index, indexSettings, name, settings);
analyzer = new BrazilianAnalyzer(version,
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -36,21 +38,16 @@ import java.util.Set;
*/
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
private final CharArraySet exclusions;
@Inject
public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new BrazilianStemFilter(tokenStream, exclusions);
return new BrazilianStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
}
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bul
super(index, indexSettings, name, settings);
analyzer = new BulgarianAnalyzer(version,
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<Catal
super(index, indexSettings, name, settings);
analyzer = new CatalanAnalyzer(version,
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
@ -39,7 +38,7 @@ public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyz
@Inject
public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
analyzer = new CJKAnalyzer(version, stopWords);
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
super(index, indexSettings, name, settings);
analyzer = new CzechAnalyzer(version,
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Danish
super(index, indexSettings, name, settings);
analyzer = new DanishAnalyzer(version,
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
super(index, indexSettings, name, settings);
analyzer = new DutchAnalyzer(version,
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,38 +19,31 @@
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.nl.DutchStemFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
private final CharArraySet exclusions;
@Inject
public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new DutchStemFilter(tokenStream, exclusions);
return new DutchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
}
}

View File

@ -20,7 +20,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -28,14 +29,12 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> articles;
private final CharArraySet articles;
@Inject
public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
@ -45,10 +44,6 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenStream create(TokenStream tokenStream) {
if (articles == null) {
return new ElisionFilter(version, tokenStream);
} else {
return new ElisionFilter(version, tokenStream, articles);
}
return new ElisionFilter(tokenStream, articles);
}
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Engli
super(index, indexSettings, name, settings);
analyzer = new EnglishAnalyzer(version,
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -21,17 +21,14 @@ package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.analysis.AnalyzerWrapper;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
/**
*
*/
public final class FieldNameAnalyzer extends Analyzer {
public final class FieldNameAnalyzer extends AnalyzerWrapper {
private final ImmutableMap<String, Analyzer> analyzers;
@ -51,23 +48,13 @@ public final class FieldNameAnalyzer extends Analyzer {
}
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
return getAnalyzer(fieldName).tokenStream(fieldName, reader);
protected Analyzer getWrappedAnalyzer(String fieldName) {
return getAnalyzer(fieldName);
}
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
return getAnalyzer(fieldName).reusableTokenStream(fieldName, reader);
}
@Override
public int getPositionIncrementGap(String fieldName) {
return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
@Override
public int getOffsetGap(Fieldable field) {
return getAnalyzer(field.name()).getOffsetGap(field);
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}
private Analyzer getAnalyzer(String name) {

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Finni
super(index, indexSettings, name, settings);
analyzer = new FinnishAnalyzer(version,
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
super(index, indexSettings, name, settings);
analyzer = new FrenchAnalyzer(version,
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -23,34 +23,29 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchStemFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
private final CharArraySet exclusions;
@Inject
public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new FrenchStemFilter(tokenStream, exclusions);
return new FrenchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
}
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Gali
super(index, indexSettings, name, settings);
analyzer = new GalicianAnalyzer(version,
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
super(index, indexSettings, name, settings);
analyzer = new GermanAnalyzer(version,
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,38 +19,31 @@
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
private final CharArraySet exclusions;
@Inject
public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
String[] stemExclusion = settings.getAsArray("stem_exclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new GermanStemFilter(tokenStream, exclusions);
return new GermanStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
}
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAn
super(index, indexSettings, name, settings);
analyzer = new HindiAnalyzer(version,
Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Hun
super(index, indexSettings, name, settings);
analyzer = new HungarianAnalyzer(version,
Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider<In
super(index, indexSettings, name, settings);
analyzer = new IndonesianAnalyzer(version,
Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Itali
super(index, indexSettings, name, settings);
analyzer = new ItalianAnalyzer(version,
Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,9 +19,9 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class LatvianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Latvi
super(index, indexSettings, name, settings);
analyzer = new LatvianAnalyzer(version,
Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LengthFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.elasticsearch.ElasticSearchIllegalArgumentException;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Nor
super(index, indexSettings, name, settings);
analyzer = new NorwegianAnalyzer(version,
Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,9 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -30,7 +31,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
import java.util.regex.Pattern;
/**
@ -46,7 +46,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Patte
boolean lowercase = settings.getAsBoolean("lowercase", true);
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) {

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Po
super(index, indexSettings, name, settings);
analyzer = new PortugueseAnalyzer(version,
Analysis.parseStopWords(env, settings, PortugueseAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Roma
super(index, indexSettings, name, settings);
analyzer = new RomanianAnalyzer(version,
Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
super(index, indexSettings, name, settings);
analyzer = new RussianAnalyzer(version,
Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ru.RussianStemFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -39,6 +39,6 @@ public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenStream create(TokenStream tokenStream) {
return new RussianStemFilter(tokenStream);
return new SnowballFilter(tokenStream, "Russian");
}
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -21,11 +21,12 @@ package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -51,7 +52,7 @@ import java.util.Set;
*/
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
private static final ImmutableMap<String, Set<?>> defaultLanguageStopwords = MapBuilder.<String, Set<?>>newMapBuilder()
private static final ImmutableMap<String, CharArraySet> defaultLanguageStopwords = MapBuilder.<String, CharArraySet>newMapBuilder()
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
.put("Dutch", DutchAnalyzer.getDefaultStopSet())
.put("German", GermanAnalyzer.getDefaultStopSet())
@ -66,8 +67,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
super(index, indexSettings, name, settings);
String language = settings.get("language", settings.get("name", "English"));
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of();
Set<?> stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
CharArraySet defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
analyzer = new SnowballAnalyzer(version, language, stopWords);
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Spani
super(index, indexSettings, name, settings);
analyzer = new SpanishAnalyzer(version,
Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -19,8 +19,9 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -28,8 +29,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
@ -40,7 +39,7 @@ public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stan
@Inject
public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
standardAnalyzer = new StandardAnalyzer(version, stopWords);
standardAnalyzer.setMaxTokenLength(maxTokenLength);

View File

@ -47,9 +47,9 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected boolean reset(final Reader reader) throws IOException {
protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
return super.reset(reader);
super.setReader(reader);
}
};
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
@ -38,7 +39,7 @@ import java.util.Map;
@AnalysisSettingsRequired
public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory {
private final Map<String, String> dictionary;
private final CharArrayMap<String> dictionary;
@Inject
public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
@ -48,7 +49,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
if (rules == null) {
throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured");
}
dictionary = new HashMap<String, String>();
dictionary = new CharArrayMap<String>(version, rules.size(), false);
parseRules(rules, dictionary, "=>");
}
@ -57,7 +58,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary);
}
static void parseRules(List<String> rules, Map<String, String> rulesMap, String mappingSep) {
static void parseRules(List<String> rules, CharArrayMap<String> rulesMap, String mappingSep) {
for (String rule : rules) {
String key, override;
List<String> mapping = Strings.splitSmart(rule, mappingSep, false);

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;

View File

@ -19,7 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
*
*/
@ -39,7 +38,7 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopAnal
@Inject
public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
this.stopAnalyzer = new StopAnalyzer(version, stopWords);
}

View File

@ -19,9 +19,10 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -37,7 +38,7 @@ import java.util.Set;
*/
public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> stopWords;
private final CharArraySet stopWords;
private final boolean ignoreCase;
@ -46,14 +47,15 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
@Inject
public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_29));
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase);
// LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29")));
}
@Override
public TokenStream create(TokenStream tokenStream) {
StopFilter filter = new StopFilter(version, tokenStream, stopWords, ignoreCase);
StopFilter filter = new StopFilter(version, tokenStream, stopWords);
filter.setEnablePositionIncrements(enablePositionIncrements);
return filter;
}

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Swedi
super(index, indexSettings, name, settings);
analyzer = new SwedishAnalyzer(version,
Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -20,6 +20,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
@ -78,7 +80,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
}
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings);
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader);

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -40,7 +40,7 @@ public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Turki
super(index, indexSettings, name, settings);
analyzer = new TurkishAnalyzer(version,
Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet(), version),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}
@Override

View File

@ -45,7 +45,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
@Override
public Tokenizer create(Reader reader) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader);
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(version, reader);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -20,7 +20,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.lucene.Lucene;

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.analysis.compound;
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -30,8 +31,6 @@ import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* Contains the common configuration settings between subclasses of this class.
*/
@ -41,7 +40,7 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
protected final int minSubwordSize;
protected final int maxSubwordSize;
protected final boolean onlyLongestMatch;
protected final Set<?> wordList;
protected final CharArraySet wordList;
@Inject
public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {