mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
with the new stemmer analyzer, don't break when there is no language stopwords, but just create it without stopwords
This commit is contained in:
parent
4a45df88c6
commit
cc5978393b
1
.idea/libraries/lucene.xml
generated
1
.idea/libraries/lucene.xml
generated
@ -17,6 +17,7 @@
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/highlighter/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/memory/src/java" />
|
||||
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/snowball/src/java" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
@ -24,6 +24,9 @@ import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
import org.elasticsearch.common.collect.ImmutableMap;
|
||||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
@ -44,39 +47,18 @@ import java.util.Set;
|
||||
* The SnowballAnalyzer comes with a StandardFilter, LowerCaseFilter, StopFilter
|
||||
* and the SnowballFilter.
|
||||
*
|
||||
* @author kimchy (Shay Banon)
|
||||
* @author harryf (Harry Fuecks)
|
||||
*/
|
||||
public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<SnowballAnalyzer> {
|
||||
|
||||
private enum SupportedAnalyzer {
|
||||
DUTCH {
|
||||
public Set<?> getStopwords() {
|
||||
return DutchAnalyzer.getDefaultStopSet();
|
||||
}
|
||||
},
|
||||
ENGLISH {
|
||||
public Set<?> getStopwords() {
|
||||
return StopAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
}
|
||||
},
|
||||
FRENCH {
|
||||
public Set<?> getStopwords() {
|
||||
return FrenchAnalyzer.getDefaultStopSet();
|
||||
}
|
||||
},
|
||||
GERMAN {
|
||||
public Set<?> getStopwords() {
|
||||
return GermanAnalyzer.getDefaultStopSet();
|
||||
}
|
||||
},
|
||||
GERMAN2 {
|
||||
public Set<?> getStopwords() {
|
||||
return GermanAnalyzer.getDefaultStopSet();
|
||||
}
|
||||
};
|
||||
|
||||
public abstract Set<?> getStopwords();
|
||||
}
|
||||
private static final ImmutableMap<String, Set<?>> defaultLanguageStopwords = MapBuilder.<String, Set<?>>newMapBuilder()
|
||||
.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)
|
||||
.put("Dutch", DutchAnalyzer.getDefaultStopSet())
|
||||
.put("German", GermanAnalyzer.getDefaultStopSet())
|
||||
.put("German2", GermanAnalyzer.getDefaultStopSet())
|
||||
.put("French", FrenchAnalyzer.getDefaultStopSet())
|
||||
.immutableMap();
|
||||
|
||||
private final SnowballAnalyzer analyzer;
|
||||
|
||||
@ -84,8 +66,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
|
||||
super(index, indexSettings, name);
|
||||
|
||||
String language = settings.get("language", "English");
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings,
|
||||
SupportedAnalyzer.valueOf(language.toUpperCase()).getStopwords());
|
||||
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of();
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, defaultStopwords);
|
||||
|
||||
analyzer = new SnowballAnalyzer(Lucene.VERSION, language, stopWords);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user