Add "articles" setting to specify a list of stopword for the ElisionFilter.

This commit is contained in:
webdevuser 2011-05-19 20:21:25 +02:00 committed by kimchy
parent 573114a446
commit 18dd9a5f18
2 changed files with 34 additions and 2 deletions

View File

@ -118,6 +118,29 @@ public class Analysis {
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
.immutableMap();
public static Set<?> parseArticles(Environment env, Settings settings) {
String value = settings.get("articles");
if (value != null) {
if ("_none_".equals(value)) {
return ImmutableSet.of();
} else {
return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value));
}
}
String[] articles = settings.getAsArray("articles", null);
if (articles != null) {
Set setArticles = new HashSet<String>(Arrays.asList(articles));
return setArticles;
}
Set<String> pathLoadedArticles = getWordSet(env, settings, "articles");
if (pathLoadedArticles != null) {
Set setArticles = new HashSet<String>(pathLoadedArticles);
return setArticles;
}
return null;
}
public static Set<?> parseStopWords(Environment env, Settings settings, Set<?> defaultStopWords) {
String value = settings.get("stopwords");
if (value != null) {

View File

@ -24,20 +24,29 @@ import org.apache.lucene.analysis.fr.ElisionFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.Set;
/**
* @author kimchy (Shay Banon)
*/
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
@Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
private final Set<?> articles;
@Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
this.articles = Analysis.parseArticles(env, settings);
}
@Override public TokenStream create(TokenStream tokenStream) {
if (articles == null) {
return new ElisionFilter(version, tokenStream);
} else {
return new ElisionFilter(version, tokenStream, articles);
}
}
}