lucene 4: use CharArraySet for stem exclusions, stop words and articles and fix analyzer namespaces
This commit is contained in:
parent
1cc5ee7ad9
commit
b128b7a750
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.ASCIIFoldingFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -20,10 +20,8 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import com.google.common.base.Charsets;
|
import com.google.common.base.Charsets;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
import com.google.common.collect.ImmutableSet;
|
|
||||||
import com.google.common.collect.Iterators;
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||||
|
@ -51,6 +49,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
|
@ -78,18 +77,20 @@ public class Analysis {
|
||||||
return value != null && "_none_".equals(value);
|
return value != null && "_none_".equals(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Set<?> parseStemExclusion(Settings settings, Set<?> defaultStemExclusion) {
|
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) {
|
||||||
String value = settings.get("stem_exclusion");
|
String value = settings.get("stem_exclusion");
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
if ("_none_".equals(value)) {
|
if ("_none_".equals(value)) {
|
||||||
return ImmutableSet.of();
|
return CharArraySet.EMPTY_SET;
|
||||||
} else {
|
} else {
|
||||||
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
|
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
|
||||||
|
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String[] stopWords = settings.getAsArray("stem_exclusion", null);
|
String[] stopWords = settings.getAsArray("stem_exclusion", null);
|
||||||
if (stopWords != null) {
|
if (stopWords != null) {
|
||||||
return ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
|
||||||
|
return new CharArraySet(version, ImmutableList.of(stopWords), false);
|
||||||
} else {
|
} else {
|
||||||
return defaultStemExclusion;
|
return defaultStemExclusion;
|
||||||
}
|
}
|
||||||
|
@ -125,7 +126,7 @@ public class Analysis {
|
||||||
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
|
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
|
||||||
.immutableMap();
|
.immutableMap();
|
||||||
|
|
||||||
public static Set<?> parseArticles(Environment env, Settings settings, Version version) {
|
public static CharArraySet parseArticles(Environment env, Settings settings, Version version) {
|
||||||
String value = settings.get("articles");
|
String value = settings.get("articles");
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
if ("_none_".equals(value)) {
|
if ("_none_".equals(value)) {
|
||||||
|
@ -146,18 +147,22 @@ public class Analysis {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords, Version version) {
|
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) {
|
||||||
|
return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignore_case) {
|
||||||
String value = settings.get("stopwords");
|
String value = settings.get("stopwords");
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
if ("_none_".equals(value)) {
|
if ("_none_".equals(value)) {
|
||||||
return CharArraySet.EMPTY_SET;
|
return CharArraySet.EMPTY_SET;
|
||||||
} else {
|
} else {
|
||||||
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), settings.getAsBoolean("stopwords_case", false));
|
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String[] stopWords = settings.getAsArray("stopwords", null);
|
String[] stopWords = settings.getAsArray("stopwords", null);
|
||||||
if (stopWords != null) {
|
if (stopWords != null) {
|
||||||
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, settings.getAsBoolean("stopwords_case", false));
|
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case);
|
||||||
for (String stopWord : stopWords) {
|
for (String stopWord : stopWords) {
|
||||||
if (namedStopWords.containsKey(stopWord)) {
|
if (namedStopWords.containsKey(stopWord)) {
|
||||||
setStopWords.addAll(namedStopWords.get(stopWord));
|
setStopWords.addAll(namedStopWords.get(stopWord));
|
||||||
|
@ -169,7 +174,7 @@ public class Analysis {
|
||||||
}
|
}
|
||||||
List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords");
|
List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords");
|
||||||
if (pathLoadedStopWords != null) {
|
if (pathLoadedStopWords != null) {
|
||||||
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), settings.getAsBoolean("stopwords_case", false));
|
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case);
|
||||||
for (String stopWord : pathLoadedStopWords) {
|
for (String stopWord : pathLoadedStopWords) {
|
||||||
if (namedStopWords.containsKey(stopWord)) {
|
if (namedStopWords.containsKey(stopWord)) {
|
||||||
setStopWords.addAll(namedStopWords.get(stopWord));
|
setStopWords.addAll(namedStopWords.get(stopWord));
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
arabicAnalyzer = new ArabicAnalyzer(version,
|
arabicAnalyzer = new ArabicAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arme
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new ArmenianAnalyzer(version,
|
analyzer = new ArmenianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<Basque
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new BasqueAnalyzer(version,
|
analyzer = new BasqueAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new BrazilianAnalyzer(version,
|
analyzer = new BrazilianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableSet;
|
||||||
import com.google.common.collect.Iterators;
|
import com.google.common.collect.Iterators;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -36,21 +38,16 @@ import java.util.Set;
|
||||||
*/
|
*/
|
||||||
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Set<?> exclusions;
|
private final CharArraySet exclusions;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||||
if (stemExclusion.length > 0) {
|
|
||||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
|
||||||
} else {
|
|
||||||
this.exclusions = ImmutableSet.of();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new BrazilianStemFilter(tokenStream, exclusions);
|
return new BrazilianStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bul
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new BulgarianAnalyzer(version,
|
analyzer = new BulgarianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<Catal
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new CatalanAnalyzer(version,
|
analyzer = new CatalanAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -39,7 +38,7 @@ public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyz
|
||||||
@Inject
|
@Inject
|
||||||
public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
|
||||||
|
|
||||||
analyzer = new CJKAnalyzer(version, stopWords);
|
analyzer = new CJKAnalyzer(version, stopWords);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new CzechAnalyzer(version,
|
analyzer = new CzechAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Danish
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new DanishAnalyzer(version,
|
analyzer = new DanishAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new DutchAnalyzer(version,
|
analyzer = new DutchAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,38 +19,31 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
|
||||||
import com.google.common.collect.Iterators;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Set<?> exclusions;
|
private final CharArraySet exclusions;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||||
if (stemExclusion.length > 0) {
|
|
||||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
|
||||||
} else {
|
|
||||||
this.exclusions = ImmutableSet.of();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new DutchStemFilter(tokenStream, exclusions);
|
return new DutchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -20,7 +20,8 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
|
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -28,14 +29,12 @@ import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Set<?> articles;
|
private final CharArraySet articles;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
@ -45,10 +44,6 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
if (articles == null) {
|
return new ElisionFilter(tokenStream, articles);
|
||||||
return new ElisionFilter(version, tokenStream);
|
|
||||||
} else {
|
|
||||||
return new ElisionFilter(version, tokenStream, articles);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Engli
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new EnglishAnalyzer(version,
|
analyzer = new EnglishAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,17 +21,14 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||||
import org.apache.lucene.document.Fieldable;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public final class FieldNameAnalyzer extends Analyzer {
|
public final class FieldNameAnalyzer extends AnalyzerWrapper {
|
||||||
|
|
||||||
private final ImmutableMap<String, Analyzer> analyzers;
|
private final ImmutableMap<String, Analyzer> analyzers;
|
||||||
|
|
||||||
|
@ -51,23 +48,13 @@ public final class FieldNameAnalyzer extends Analyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||||
return getAnalyzer(fieldName).tokenStream(fieldName, reader);
|
return getAnalyzer(fieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||||
return getAnalyzer(fieldName).reusableTokenStream(fieldName, reader);
|
return components;
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getPositionIncrementGap(String fieldName) {
|
|
||||||
return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getOffsetGap(Fieldable field) {
|
|
||||||
return getAnalyzer(field.name()).getOffsetGap(field);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Analyzer getAnalyzer(String name) {
|
private Analyzer getAnalyzer(String name) {
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Finni
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new FinnishAnalyzer(version,
|
analyzer = new FinnishAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new FrenchAnalyzer(version,
|
analyzer = new FrenchAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -23,34 +23,29 @@ import com.google.common.collect.ImmutableSet;
|
||||||
import com.google.common.collect.Iterators;
|
import com.google.common.collect.Iterators;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Set<?> exclusions;
|
private final CharArraySet exclusions;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||||
if (stemExclusion.length > 0) {
|
|
||||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
|
||||||
} else {
|
|
||||||
this.exclusions = ImmutableSet.of();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new FrenchStemFilter(tokenStream, exclusions);
|
return new FrenchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Gali
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new GalicianAnalyzer(version,
|
analyzer = new GalicianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new GermanAnalyzer(version,
|
analyzer = new GermanAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,38 +19,31 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
|
||||||
import com.google.common.collect.Iterators;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Set<?> exclusions;
|
private final CharArraySet exclusions;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||||
if (stemExclusion.length > 0) {
|
|
||||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
|
||||||
} else {
|
|
||||||
this.exclusions = ImmutableSet.of();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new GermanStemFilter(tokenStream, exclusions);
|
return new GermanStemFilter(new KeywordMarkerFilter(tokenStream, exclusions));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAn
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new HindiAnalyzer(version,
|
analyzer = new HindiAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class HungarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Hun
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new HungarianAnalyzer(version,
|
analyzer = new HungarianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class IndonesianAnalyzerProvider extends AbstractIndexAnalyzerProvider<In
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new IndonesianAnalyzer(version,
|
analyzer = new IndonesianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class ItalianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Itali
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new ItalianAnalyzer(version,
|
analyzer = new ItalianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,9 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordTokenizer;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class LatvianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Latvi
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new LatvianAnalyzer(version,
|
analyzer = new LatvianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.LengthFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.LetterTokenizer;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class NorwegianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Nor
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new NorwegianAnalyzer(version,
|
analyzer = new NorwegianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
@ -30,7 +31,6 @@ import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,7 +46,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Patte
|
||||||
|
|
||||||
boolean lowercase = settings.getAsBoolean("lowercase", true);
|
boolean lowercase = settings.getAsBoolean("lowercase", true);
|
||||||
|
|
||||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||||
|
|
||||||
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
|
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
|
||||||
if (sPattern == null) {
|
if (sPattern == null) {
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.PorterStemFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Po
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new PortugueseAnalyzer(version,
|
analyzer = new PortugueseAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, PortugueseAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, PortugueseAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Roma
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new RomanianAnalyzer(version,
|
analyzer = new RomanianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new RussianAnalyzer(version,
|
analyzer = new RussianAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ru.RussianStemFilter;
|
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -39,6 +39,6 @@ public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new RussianStemFilter(tokenStream);
|
return new SnowballFilter(tokenStream, "Russian");
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -21,11 +21,12 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import org.apache.lucene.analysis.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.collect.MapBuilder;
|
import org.elasticsearch.common.collect.MapBuilder;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
@ -51,7 +52,7 @@ import java.util.Set;
|
||||||
*/
|
*/
|
||||||
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
|
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
|
||||||
|
|
||||||
private static final ImmutableMap<String, Set<?>> defaultLanguageStopwords = MapBuilder.<String, Set<?>>newMapBuilder()
|
private static final ImmutableMap<String, CharArraySet> defaultLanguageStopwords = MapBuilder.<String, CharArraySet>newMapBuilder()
|
||||||
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
|
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
|
||||||
.put("Dutch", DutchAnalyzer.getDefaultStopSet())
|
.put("Dutch", DutchAnalyzer.getDefaultStopSet())
|
||||||
.put("German", GermanAnalyzer.getDefaultStopSet())
|
.put("German", GermanAnalyzer.getDefaultStopSet())
|
||||||
|
@ -66,8 +67,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
|
|
||||||
String language = settings.get("language", settings.get("name", "English"));
|
String language = settings.get("language", settings.get("name", "English"));
|
||||||
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of();
|
CharArraySet defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : CharArraySet.EMPTY_SET;
|
||||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version);
|
||||||
|
|
||||||
analyzer = new SnowballAnalyzer(version, language, stopWords);
|
analyzer = new SnowballAnalyzer(version, language, stopWords);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Spani
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new SpanishAnalyzer(version,
|
analyzer = new SpanishAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -28,8 +29,6 @@ import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -40,7 +39,7 @@ public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stan
|
||||||
@Inject
|
@Inject
|
||||||
public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||||
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
standardAnalyzer = new StandardAnalyzer(version, stopWords);
|
standardAnalyzer = new StandardAnalyzer(version, stopWords);
|
||||||
standardAnalyzer.setMaxTokenLength(maxTokenLength);
|
standardAnalyzer.setMaxTokenLength(maxTokenLength);
|
||||||
|
|
|
@ -47,9 +47,9 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
|
||||||
tok = new StopFilter(matchVersion, tok, stopwords);
|
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||||
return new TokenStreamComponents(src, tok) {
|
return new TokenStreamComponents(src, tok) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean reset(final Reader reader) throws IOException {
|
protected void setReader(final Reader reader) throws IOException {
|
||||||
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
return super.reset(reader);
|
super.setReader(reader);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
|
@ -38,7 +39,7 @@ import java.util.Map;
|
||||||
@AnalysisSettingsRequired
|
@AnalysisSettingsRequired
|
||||||
public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Map<String, String> dictionary;
|
private final CharArrayMap<String> dictionary;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
@ -48,7 +49,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
|
||||||
if (rules == null) {
|
if (rules == null) {
|
||||||
throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured");
|
throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured");
|
||||||
}
|
}
|
||||||
dictionary = new HashMap<String, String>();
|
dictionary = new CharArrayMap<String>(version, rules.size(), false);
|
||||||
parseRules(rules, dictionary, "=>");
|
parseRules(rules, dictionary, "=>");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,7 +58,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor
|
||||||
return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary);
|
return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parseRules(List<String> rules, Map<String, String> rulesMap, String mappingSep) {
|
static void parseRules(List<String> rules, CharArrayMap<String> rulesMap, String mappingSep) {
|
||||||
for (String rule : rules) {
|
for (String rule : rules) {
|
||||||
String key, override;
|
String key, override;
|
||||||
List<String> mapping = Strings.splitSmart(rule, mappingSep, false);
|
List<String> mapping = Strings.splitSmart(rule, mappingSep, false);
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.PorterStemFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||||
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
|
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
|
||||||
|
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter;
|
||||||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
||||||
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
||||||
import org.apache.lucene.analysis.en.KStemFilter;
|
import org.apache.lucene.analysis.en.KStemFilter;
|
||||||
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||||
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
||||||
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
||||||
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
||||||
|
|
|
@ -19,7 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -39,7 +38,7 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopAnal
|
||||||
@Inject
|
@Inject
|
||||||
public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
Set<?> stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
||||||
this.stopAnalyzer = new StopAnalyzer(version, stopWords);
|
this.stopAnalyzer = new StopAnalyzer(version, stopWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,9 +19,10 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.StopAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.core.StopFilter;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
@ -37,7 +38,7 @@ import java.util.Set;
|
||||||
*/
|
*/
|
||||||
public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final Set<?> stopWords;
|
private final CharArraySet stopWords;
|
||||||
|
|
||||||
private final boolean ignoreCase;
|
private final boolean ignoreCase;
|
||||||
|
|
||||||
|
@ -46,14 +47,15 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
@Inject
|
@Inject
|
||||||
public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version);
|
|
||||||
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
|
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
|
||||||
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_29));
|
this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase);
|
||||||
|
// LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined
|
||||||
|
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
StopFilter filter = new StopFilter(version, tokenStream, stopWords, ignoreCase);
|
StopFilter filter = new StopFilter(version, tokenStream, stopWords);
|
||||||
filter.setEnablePositionIncrements(enablePositionIncrements);
|
filter.setEnablePositionIncrements(enablePositionIncrements);
|
||||||
return filter;
|
return filter;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Swedi
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new SwedishAnalyzer(version,
|
analyzer = new SwedishAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
|
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||||
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
|
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
|
||||||
import org.apache.lucene.analysis.synonym.SynonymFilter;
|
import org.apache.lucene.analysis.synonym.SynonymFilter;
|
||||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||||
|
@ -78,7 +80,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
}
|
}
|
||||||
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings);
|
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings);
|
||||||
|
|
||||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
Analyzer analyzer = new Analyzer() {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader);
|
Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader);
|
||||||
|
|
|
@ -19,8 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -40,7 +40,7 @@ public class TurkishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Turki
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
analyzer = new TurkishAnalyzer(version,
|
analyzer = new TurkishAnalyzer(version,
|
||||||
Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet(), version),
|
Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet(), version),
|
||||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader);
|
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(version, reader);
|
||||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||||
return tokenizer;
|
return tokenizer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
|
@ -19,10 +19,10 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
package org.elasticsearch.index.analysis.compound;
|
package org.elasticsearch.index.analysis.compound;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
|
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
|
||||||
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
|
@ -30,8 +31,6 @@ import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.Analysis;
|
import org.elasticsearch.index.analysis.Analysis;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains the common configuration settings between subclasses of this class.
|
* Contains the common configuration settings between subclasses of this class.
|
||||||
*/
|
*/
|
||||||
|
@ -41,7 +40,7 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
|
||||||
protected final int minSubwordSize;
|
protected final int minSubwordSize;
|
||||||
protected final int maxSubwordSize;
|
protected final int maxSubwordSize;
|
||||||
protected final boolean onlyLongestMatch;
|
protected final boolean onlyLongestMatch;
|
||||||
protected final Set<?> wordList;
|
protected final CharArraySet wordList;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
|
Loading…
Reference in New Issue