Add "articles" setting to specify a list of stopword for the ElisionFilter.
This commit is contained in:
parent
573114a446
commit
18dd9a5f18
|
@ -118,6 +118,29 @@ public class Analysis {
|
||||||
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
|
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
|
||||||
.immutableMap();
|
.immutableMap();
|
||||||
|
|
||||||
|
public static Set<?> parseArticles(Environment env, Settings settings) {
|
||||||
|
String value = settings.get("articles");
|
||||||
|
if (value != null) {
|
||||||
|
if ("_none_".equals(value)) {
|
||||||
|
return ImmutableSet.of();
|
||||||
|
} else {
|
||||||
|
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String[] articles = settings.getAsArray("articles", null);
|
||||||
|
if (articles != null) {
|
||||||
|
Set setArticles = new HashSet<String>(Arrays.asList(articles));
|
||||||
|
return setArticles;
|
||||||
|
}
|
||||||
|
Set<String> pathLoadedArticles = getWordSet(env, settings, "articles");
|
||||||
|
if (pathLoadedArticles != null) {
|
||||||
|
Set setArticles = new HashSet<String>(pathLoadedArticles);
|
||||||
|
return setArticles;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords) {
|
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords) {
|
||||||
String value = settings.get("stopwords");
|
String value = settings.get("stopwords");
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
|
|
|
@ -24,20 +24,29 @@ import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.settings.IndexSettings;
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author kimchy (Shay Banon)
|
* @author kimchy (Shay Banon)
|
||||||
*/
|
*/
|
||||||
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
@Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
private final Set<?> articles;
|
||||||
|
|
||||||
|
@Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
|
this.articles = Analysis.parseArticles(env, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public TokenStream create(TokenStream tokenStream) {
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
if (articles == null) {
|
||||||
return new ElisionFilter(version, tokenStream);
|
return new ElisionFilter(version, tokenStream);
|
||||||
|
} else {
|
||||||
|
return new ElisionFilter(version, tokenStream, articles);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue