Expose recursion level for Hunspell token filter. Closes #3369

This commit is contained in:
Lukas Vlcek 2013-07-24 13:59:08 +02:00 committed by Alexander Reelsen
parent 6101cbf2bf
commit 4f0080b206
3 changed files with 59 additions and 6 deletions

View File

@ -37,6 +37,7 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
private final HunspellDictionary dictionary; private final HunspellDictionary dictionary;
private final boolean dedup; private final boolean dedup;
private final int recursionLevel;
@Inject @Inject
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) { public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
@ -53,15 +54,24 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
} }
dedup = settings.getAsBoolean("dedup", true); dedup = settings.getAsBoolean("dedup", true);
recursionLevel = settings.getAsInt("recursion_level", 2);
if (recursionLevel < 0) {
throw new ElasticSearchIllegalArgumentException(String.format(Locale.ROOT, "Negative recursion level not allowed for hunspell [%d]", recursionLevel));
}
} }
@Override @Override
public TokenStream create(TokenStream tokenStream) { public TokenStream create(TokenStream tokenStream) {
return new HunspellStemFilter(tokenStream, dictionary, dedup); return new HunspellStemFilter(tokenStream, dictionary, dedup, recursionLevel);
} }
public boolean dedup() { public boolean dedup() {
return dedup; return dedup;
} }
public int recursionLevel() {
return recursionLevel;
}
} }

View File

@ -75,7 +75,7 @@ public class HunspellService extends AbstractComponent {
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter(); private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
private final LoadingCache<String, HunspellDictionary> dictionaries; private final LoadingCache<String, HunspellDictionary> dictionaries;
private final Map<String, HunspellDictionary> knownDicitionaries; private final Map<String, HunspellDictionary> knownDictionaries;
private final boolean defaultIgnoreCase; private final boolean defaultIgnoreCase;
private final boolean defaultStrictAffixParsing; private final boolean defaultStrictAffixParsing;
@ -86,9 +86,9 @@ public class HunspellService extends AbstractComponent {
} }
@Inject @Inject
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDicitionaries) { public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDictionaries) {
super(settings); super(settings);
this.knownDicitionaries = knownDicitionaries; this.knownDictionaries = knownDictionaries;
this.hunspellDir = resolveHunspellDirectory(settings, env); this.hunspellDir = resolveHunspellDirectory(settings, env);
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false); this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false); this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
@ -96,7 +96,7 @@ public class HunspellService extends AbstractComponent {
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() { dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
@Override @Override
public HunspellDictionary load(String locale) throws Exception { public HunspellDictionary load(String locale) throws Exception {
HunspellDictionary dictionary = knownDicitionaries.get(locale); HunspellDictionary dictionary = knownDictionaries.get(locale);
if (dictionary == null) { if (dictionary == null) {
dictionary = loadDictionary(locale, settings, env, version); dictionary = loadDictionary(locale, settings, env, version);
} }
@ -146,7 +146,7 @@ public class HunspellService extends AbstractComponent {
* @param env The node environment (from which the conf path will be resolved) * @param env The node environment (from which the conf path will be resolved)
* @param version The lucene version * @param version The lucene version
* @return The loaded Hunspell dictionary * @return The loaded Hunspell dictionary
* @throws Exception when loading fails (due to IO erros or malformed dictionary files) * @throws Exception when loading fails (due to IO errors or malformed dictionary files)
*/ */
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception { private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.test.unit.index.analysis; package org.elasticsearch.test.unit.index.analysis;
import org.elasticsearch.common.inject.ProvisionException;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory; import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
@ -63,4 +64,46 @@ public class HunspellTokenFilterFactoryTests {
assertThat(hunspellTokenFilter.dedup(), is(false)); assertThat(hunspellTokenFilter.dedup(), is(false));
} }
@Test
public void testDefaultRecursionLevel() throws IOException {
Settings settings = settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("index.analysis.filter.en_US.type", "hunspell")
.put("index.analysis.filter.en_US.locale", "en_US")
.build();
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
assertThat(hunspellTokenFilter.recursionLevel(), is(2));
}
@Test
public void testCustomRecursionLevel() throws IOException {
Settings settings = settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("index.analysis.filter.en_US.type", "hunspell")
.put("index.analysis.filter.en_US.recursion_level", 0)
.put("index.analysis.filter.en_US.locale", "en_US")
.build();
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
assertThat(hunspellTokenFilter.recursionLevel(), is(0));
}
@Test(expected = ProvisionException.class)
public void negativeRecursionLevelShouldFail() throws IOException {
Settings settings = settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("index.analysis.filter.en_US.type", "hunspell")
.put("index.analysis.filter.en_US.recursion_level", -1)
.put("index.analysis.filter.en_US.locale", "en_US")
.build();
AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
}
} }