with the new stemmer analyzer, don't break when there is no language stopwords, but just create it without stopwords

This commit is contained in:
kimchy 2011-01-06 09:53:56 +02:00
parent 4a45df88c6
commit cc5978393b
2 changed files with 14 additions and 31 deletions

View File

@ -17,6 +17,7 @@
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/highlighter/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/memory/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/snowball/src/java" />
</SOURCES>
</library>
</component>

View File

@ -24,6 +24,9 @@ import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.ImmutableSet;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.lucene.Lucene;
@ -44,39 +47,18 @@ import java.util.Set;
* The SnowballAnalyzer comes with a StandardFilter, LowerCaseFilter, StopFilter
* and the SnowballFilter.
*
* @author kimchy (Shay Banon)
* @author harryf (Harry Fuecks)
*/
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
private enum SupportedAnalyzer {
DUTCH {
public Set<?> getStopwords() {
return DutchAnalyzer.getDefaultStopSet();
}
},
ENGLISH {
public Set<?> getStopwords() {
return StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
},
FRENCH {
public Set<?> getStopwords() {
return FrenchAnalyzer.getDefaultStopSet();
}
},
GERMAN {
public Set<?> getStopwords() {
return GermanAnalyzer.getDefaultStopSet();
}
},
GERMAN2 {
public Set<?> getStopwords() {
return GermanAnalyzer.getDefaultStopSet();
}
};
public abstract Set<?> getStopwords();
}
private static final ImmutableMap<String, Set<?>> defaultLanguageStopwords = MapBuilder.<String, Set<?>>newMapBuilder()
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
.put("Dutch", DutchAnalyzer.getDefaultStopSet())
.put("German", GermanAnalyzer.getDefaultStopSet())
.put("German2", GermanAnalyzer.getDefaultStopSet())
.put("French", FrenchAnalyzer.getDefaultStopSet())
.immutableMap();
private final SnowballAnalyzer analyzer;
@ -84,8 +66,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
super(index, indexSettings, name);
String language = settings.get("language", "English");
Set<?> stopWords = Analysis.parseStopWords(settings,
SupportedAnalyzer.valueOf(language.toUpperCase()).getStopwords());
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of();
Set<?> stopWords = Analysis.parseStopWords(settings, defaultStopwords);
analyzer = new SnowballAnalyzer(Lucene.VERSION, language, stopWords);
}