From 4f0080b2065091bb2dce4d9100bf1db6b093ae93 Mon Sep 17 00:00:00 2001 From: Lukas Vlcek Date: Wed, 24 Jul 2013 13:59:08 +0200 Subject: [PATCH] Expose recursion level for Hunspell token filter. Closes #3369 --- .../analysis/HunspellTokenFilterFactory.java | 12 +++++- .../indices/analysis/HunspellService.java | 10 ++--- .../HunspellTokenFilterFactoryTests.java | 43 +++++++++++++++++++ 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactory.java index b34bdf2349e..3d35f4841e8 100644 --- a/src/main/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactory.java @@ -37,6 +37,7 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory { private final HunspellDictionary dictionary; private final boolean dedup; + private final int recursionLevel; @Inject public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) { @@ -53,15 +54,24 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory { } dedup = settings.getAsBoolean("dedup", true); + + recursionLevel = settings.getAsInt("recursion_level", 2); + if (recursionLevel < 0) { + throw new ElasticSearchIllegalArgumentException(String.format(Locale.ROOT, "Negative recursion level not allowed for hunspell [%d]", recursionLevel)); + } } @Override public TokenStream create(TokenStream tokenStream) { - return new HunspellStemFilter(tokenStream, dictionary, dedup); + return new HunspellStemFilter(tokenStream, dictionary, dedup, recursionLevel); } public boolean dedup() { return dedup; } + public int recursionLevel() { + return recursionLevel; + } + } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/indices/analysis/HunspellService.java b/src/main/java/org/elasticsearch/indices/analysis/HunspellService.java index 8eceec94d59..9812b129f6f 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/HunspellService.java +++ b/src/main/java/org/elasticsearch/indices/analysis/HunspellService.java @@ -75,7 +75,7 @@ public class HunspellService extends AbstractComponent { private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter(); private final LoadingCache dictionaries; - private final Map knownDicitionaries; + private final Map knownDictionaries; private final boolean defaultIgnoreCase; private final boolean defaultStrictAffixParsing; @@ -86,9 +86,9 @@ public class HunspellService extends AbstractComponent { } @Inject - public HunspellService(final Settings settings, final Environment env, final Map knownDicitionaries) { + public HunspellService(final Settings settings, final Environment env, final Map knownDictionaries) { super(settings); - this.knownDicitionaries = knownDicitionaries; + this.knownDictionaries = knownDictionaries; this.hunspellDir = resolveHunspellDirectory(settings, env); this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false); this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false); @@ -96,7 +96,7 @@ public class HunspellService extends AbstractComponent { dictionaries = CacheBuilder.newBuilder().build(new CacheLoader() { @Override public HunspellDictionary load(String locale) throws Exception { - HunspellDictionary dictionary = knownDicitionaries.get(locale); + HunspellDictionary dictionary = knownDictionaries.get(locale); if (dictionary == null) { dictionary = loadDictionary(locale, settings, env, version); } @@ -146,7 +146,7 @@ public class HunspellService extends AbstractComponent { * @param env The node environment (from which the conf path will be resolved) * @param version The lucene version * @return The loaded Hunspell dictionary - * @throws Exception when loading fails (due to IO erros or malformed dictionary files) + * @throws Exception when loading fails (due to IO errors or malformed dictionary files) */ private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception { if (logger.isDebugEnabled()) { diff --git a/src/test/java/org/elasticsearch/test/unit/index/analysis/HunspellTokenFilterFactoryTests.java b/src/test/java/org/elasticsearch/test/unit/index/analysis/HunspellTokenFilterFactoryTests.java index ef46d9eb801..c3b744a1251 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/analysis/HunspellTokenFilterFactoryTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/analysis/HunspellTokenFilterFactoryTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.test.unit.index.analysis; +import org.elasticsearch.common.inject.ProvisionException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.analysis.HunspellTokenFilterFactory; @@ -63,4 +64,46 @@ public class HunspellTokenFilterFactoryTests { assertThat(hunspellTokenFilter.dedup(), is(false)); } + @Test + public void testDefaultRecursionLevel() throws IOException { + Settings settings = settingsBuilder() + .put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile()) + .put("index.analysis.filter.en_US.type", "hunspell") + .put("index.analysis.filter.en_US.locale", "en_US") + .build(); + + AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US"); + assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class)); + HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter; + assertThat(hunspellTokenFilter.recursionLevel(), is(2)); + } + + @Test + public void testCustomRecursionLevel() throws IOException { + Settings settings = settingsBuilder() + .put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile()) + .put("index.analysis.filter.en_US.type", "hunspell") + .put("index.analysis.filter.en_US.recursion_level", 0) + .put("index.analysis.filter.en_US.locale", "en_US") + .build(); + + AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US"); + assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class)); + HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter; + assertThat(hunspellTokenFilter.recursionLevel(), is(0)); + } + + @Test(expected = ProvisionException.class) + public void negativeRecursionLevelShouldFail() throws IOException { + Settings settings = settingsBuilder() + .put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile()) + .put("index.analysis.filter.en_US.type", "hunspell") + .put("index.analysis.filter.en_US.recursion_level", -1) + .put("index.analysis.filter.en_US.locale", "en_US") + .build(); + AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); + } + }