lucene 4: use CharArraySet for stem exclusions, stop words and articles and fix analyzer namespaces
This commit is contained in:
parent
1cc5ee7ad9
commit
b128b7a750
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.ASCIIFoldingFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -20,10 +20,8 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterators;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
|
@ -51,6 +49,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
|||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -78,18 +77,20 @@ public class Analysis {
|
|||
return value != null && "_none_".equals(value);
|
||||
}
|
||||
|
||||
public static Set<?> parseStemExclusion(Settings settings, Set<?> defaultStemExclusion) {
|
||||
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) {
|
||||
String value = settings.get("stem_exclusion");
|
||||
if (value != null) {
|
||||
if ("_none_".equals(value)) {
|
||||
return ImmutableSet.of();
|
||||
return CharArraySet.EMPTY_SET;
|
||||
} else {
|
||||
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
|
||||
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
|
||||
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false);
|
||||
}
|
||||
}
|
||||
String[] stopWords = settings.getAsArray("stem_exclusion", null);
|
||||
if (stopWords != null) {
|
||||
return ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
|
||||
return new CharArraySet(version, ImmutableList.of(stopWords), false);
|
||||
} else {
|
||||
return defaultStemExclusion;
|
||||
}
|
||||
|
@ -125,7 +126,7 @@ public class Analysis {
|
|||
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
|
||||
.immutableMap();
|
||||
|
||||
public static Set<?> parseArticles(Environment env, Settings settings, Version version) {
|
||||
public static CharArraySet parseArticles(Environment env, Settings settings, Version version) {
|
||||
String value = settings.get("articles");
|
||||
if (value != null) {
|
||||
if ("_none_".equals(value)) {
|
||||
|
@ -146,18 +147,22 @@ public class Analysis {
|
|||
return null;
|
||||
}
|
||||
|
||||
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords, Version version) {
|
||||
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) {
|
||||
return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false));
|
||||
}
|
||||
|
||||
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignore_case) {
|
||||
String value = settings.get("stopwords");
|
||||
if (value != null) {
|
||||
if ("_none_".equals(value)) {
|
||||
return CharArraySet.EMPTY_SET;
|
||||
} else {
|
||||
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), settings.getAsBoolean("stopwords_case", false));
|
||||
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case);
|
||||
}
|
||||
}
|
||||
String[] stopWords = settings.getAsArray("stopwords", null);
|
||||
if (stopWords != null) {
|
||||
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, settings.getAsBoolean("stopwords_case", false));
|
||||
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case);
|
||||
for (String stopWord : stopWords) {
|
||||
if (namedStopWords.containsKey(stopWord)) {
|
||||
setStopWords.addAll(namedStopWords.get(stopWord));
|
||||
|
@ -169,7 +174,7 @@ public class Analysis {
|
|||
}
|
||||
List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords");
|
||||
if (pathLoadedStopWords != null) {
|
||||
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), settings.getAsBoolean("stopwords_case", false));
|
||||
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case);
|
||||
for (String stopWord : pathLoadedStopWords) {
|
||||
if (namedStopWords.containsKey(stopWord)) {
|
||||
setStopWords.addAll(namedStopWords.get(stopWord));
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
|
|||
super(index, indexSettings, name, settings);
|
||||
arabicAnalyzer = new ArabicAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arme
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ArmenianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<Basque
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BasqueAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BrazilianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableSet;
|
|||
import com.google.common.collect.Iterators;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -36,21 +38,16 @@ import java.util.Set;
|
|||
*/
|
||||
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Set<?> exclusions;
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
@Inject
|
||||
public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.exclusions = ImmutableSet.of();
|
||||
}
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new BrazilianStemFilter(tokenStream, exclusions);
|
||||
return new BrazilianStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bul
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BulgarianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<Catal
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new CatalanAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -39,7 +38,7 @@ public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyz
|
|||
@Inject
|
||||
public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
|
||||
|
||||
analyzer = new CJKAnalyzer(version, stopWords);
|
||||
}
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new CzechAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Danish
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new DanishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new DutchAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,38 +19,31 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterators;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Set<?> exclusions;
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
@Inject
|
||||
public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.exclusions = ImmutableSet.of();
|
||||
}
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new DutchStemFilter(tokenStream, exclusions);
|
||||
return new DutchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||
}
|
||||
}
|
|
@ -20,7 +20,8 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -28,14 +29,12 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Set<?> articles;
|
||||
private final CharArraySet articles;
|
||||
|
||||
@Inject
|
||||
public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
|
@ -45,10 +44,6 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (articles == null) {
|
||||
return new ElisionFilter(version, tokenStream);
|
||||
} else {
|
||||
return new ElisionFilter(version, tokenStream, articles);
|
||||
}
|
||||
return new ElisionFilter(tokenStream, articles);
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Engli
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new EnglishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,17 +21,14 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class FieldNameAnalyzer extends Analyzer {
|
||||
public final class FieldNameAnalyzer extends AnalyzerWrapper {
|
||||
|
||||
private final ImmutableMap<String, Analyzer> analyzers;
|
||||
|
||||
|
@ -51,23 +48,13 @@ public final class FieldNameAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return getAnalyzer(fieldName).tokenStream(fieldName, reader);
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return getAnalyzer(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
return getAnalyzer(fieldName).reusableTokenStream(fieldName, reader);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOffsetGap(Fieldable field) {
|
||||
return getAnalyzer(field.name()).getOffsetGap(field);
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return components;
|
||||
}
|
||||
|
||||
private Analyzer getAnalyzer(String name) {
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Finni
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new FinnishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new FrenchAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,34 +23,29 @@ import com.google.common.collect.ImmutableSet;
|
|||
import com.google.common.collect.Iterators;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Set<?> exclusions;
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
@Inject
|
||||
public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.exclusions = ImmutableSet.of();
|
||||
}
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new FrenchStemFilter(tokenStream, exclusions);
|
||||
return new FrenchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Gali
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new GalicianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new GermanAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,38 +19,31 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterators;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Set<?> exclusions;
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
@Inject
|
||||
public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
} else {
|
||||
this.exclusions = ImmutableSet.of();
|
||||
}
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new GermanStemFilter(tokenStream, exclusions);
|
||||
return new GermanStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAn
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new HindiAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Hun
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new HungarianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider<In
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new IndonesianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Itali
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ItalianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,9 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class LatvianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Latvi
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new LatvianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.LengthFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Nor
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new NorwegianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -30,7 +31,6 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
|
@ -46,7 +46,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Patte
|
|||
|
||||
boolean lowercase = settings.getAsBoolean("lowercase", true);
|
||||
|
||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
|
||||
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
|
||||
if (sPattern == null) {
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Po
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new PortugueseAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, PortugueseAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Roma
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new RomanianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new RussianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ru.RussianStemFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -39,6 +39,6 @@ public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new RussianStemFilter(tokenStream);
|
||||
return new SnowballFilter(tokenStream, "Russian");
|
||||
}
|
||||
}
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -21,11 +21,12 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -51,7 +52,7 @@ import java.util.Set;
|
|||
*/
|
||||
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
|
||||
|
||||
private static final ImmutableMap<String, Set<?>> defaultLanguageStopwords = MapBuilder.<String, Set<?>>newMapBuilder()
|
||||
private static final ImmutableMap<String, CharArraySet> defaultLanguageStopwords = MapBuilder.<String, CharArraySet>newMapBuilder()
|
||||
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
|
||||
.put("Dutch", DutchAnalyzer.getDefaultStopSet())
|
||||
.put("German", GermanAnalyzer.getDefaultStopSet())
|
||||
|
@ -66,8 +67,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
|
|||
super(index, indexSettings, name, settings);
|
||||
|
||||
String language = settings.get("language", settings.get("name", "English"));
|
||||
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of();
|
||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
|
||||
CharArraySet defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : CharArraySet.EMPTY_SET;
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
|
||||
|
||||
analyzer = new SnowballAnalyzer(version, language, stopWords);
|
||||
}
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Spani
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new SpanishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -28,8 +29,6 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -40,7 +39,7 @@ public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stan
|
|||
@Inject
|
||||
public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
standardAnalyzer = new StandardAnalyzer(version, stopWords);
|
||||
standardAnalyzer.setMaxTokenLength(maxTokenLength);
|
||||
|
|
|
@ -47,9 +47,9 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
|
|||
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected boolean reset(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
return super.reset(reader);
|
||||
super.setReader(reader);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -38,7 +39,7 @@ import java.util.Map;
|
|||
@AnalysisSettingsRequired
|
||||
public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Map<String, String> dictionary;
|
||||
private final CharArrayMap<String> dictionary;
|
||||
|
||||
@Inject
|
||||
public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
|
@ -48,7 +49,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
|
|||
if (rules == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured");
|
||||
}
|
||||
dictionary = new HashMap<String, String>();
|
||||
dictionary = new CharArrayMap<String>(version, rules.size(), false);
|
||||
parseRules(rules, dictionary, "=>");
|
||||
}
|
||||
|
||||
|
@ -57,7 +58,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
|
|||
return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary);
|
||||
}
|
||||
|
||||
static void parseRules(List<String> rules, Map<String, String> rulesMap, String mappingSep) {
|
||||
static void parseRules(List<String> rules, CharArrayMap<String> rulesMap, String mappingSep) {
|
||||
for (String rule : rules) {
|
||||
String key, override;
|
||||
List<String> mapping = Strings.splitSmart(rule, mappingSep, false);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
|
||||
|
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter;
|
|||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
||||
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
||||
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -39,7 +38,7 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopAnal
|
|||
@Inject
|
||||
public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
this.stopAnalyzer = new StopAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,9 +19,10 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -37,7 +38,7 @@ import java.util.Set;
|
|||
*/
|
||||
public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final Set<?> stopWords;
|
||||
private final CharArraySet stopWords;
|
||||
|
||||
private final boolean ignoreCase;
|
||||
|
||||
|
@ -46,14 +47,15 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Inject
|
||||
public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
|
||||
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_29));
|
||||
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase);
|
||||
// LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined
|
||||
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29")));
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
StopFilter filter = new StopFilter(version, tokenStream, stopWords, ignoreCase);
|
||||
StopFilter filter = new StopFilter(version, tokenStream, stopWords);
|
||||
filter.setEnablePositionIncrements(enablePositionIncrements);
|
||||
return filter;
|
||||
}
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Swedi
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new SwedishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
|
||||
import org.apache.lucene.analysis.synonym.SynonymFilter;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
|
@ -78,7 +80,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
}
|
||||
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings);
|
||||
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader);
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -40,7 +40,7 @@ public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Turki
|
|||
super(index, indexSettings, name, settings);
|
||||
analyzer = new TurkishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -45,7 +45,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
|
|||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader);
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(version, reader);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
|
|
@ -19,10 +19,10 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.analysis.compound;
|
||||
|
||||
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
|
@ -30,8 +31,6 @@ import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
|||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Contains the common configuration settings between subclasses of this class.
|
||||
*/
|
||||
|
@ -41,7 +40,7 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
|
|||
protected final int minSubwordSize;
|
||||
protected final int maxSubwordSize;
|
||||
protected final boolean onlyLongestMatch;
|
||||
protected final Set<?> wordList;
|
||||
protected final CharArraySet wordList;
|
||||
|
||||
@Inject
|
||||
public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
|
|
Loading…
Reference in New Issue