From 18dd9a5f183d95914791784f8e475ea5c1f7945d Mon Sep 17 00:00:00 2001 From: webdevuser Date: Thu, 19 May 2011 20:21:25 +0200 Subject: [PATCH] Add "articles" setting to specify a list of stopword for the ElisionFilter. --- .../index/analysis/Analysis.java | 23 +++++++++++++++++++ .../analysis/ElisionTokenFilterFactory.java | 13 +++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java index d0b98062d3c..2f3b592ab42 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java @@ -118,6 +118,29 @@ public class Analysis { .put("_turkish_", TurkishAnalyzer.getDefaultStopSet()) .immutableMap(); + public static Set parseArticles(Environment env, Settings settings) { + String value = settings.get("articles"); + if (value != null) { + if ("_none_".equals(value)) { + return ImmutableSet.of(); + } else { + return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value)); + } + } + String[] articles = settings.getAsArray("articles", null); + if (articles != null) { + Set setArticles = new HashSet(Arrays.asList(articles)); + return setArticles; + } + Set pathLoadedArticles = getWordSet(env, settings, "articles"); + if (pathLoadedArticles != null) { + Set setArticles = new HashSet(pathLoadedArticles); + return setArticles; + } + + return null; + } + public static Set parseStopWords(Environment env, Settings settings, Set defaultStopWords) { String value = settings.get("stopwords"); if (value != null) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java index 919b29c87a6..a01f12a4510 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java @@ -24,20 +24,29 @@ import org.apache.lucene.analysis.fr.ElisionFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; +import java.util.Set; /** * @author kimchy (Shay Banon) */ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory { - @Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + private final Set articles; + + @Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); + this.articles = Analysis.parseArticles(env, settings); } @Override public TokenStream create(TokenStream tokenStream) { - return new ElisionFilter(version, tokenStream); + if (articles == null) { + return new ElisionFilter(version, tokenStream); + } else { + return new ElisionFilter(version, tokenStream, articles); + } } } \ No newline at end of file