Expose recursion level for Hunspell token filter. Closes #3369
This commit is contained in:
parent
6101cbf2bf
commit
4f0080b206
|
@ -37,6 +37,7 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final HunspellDictionary dictionary;
|
private final HunspellDictionary dictionary;
|
||||||
private final boolean dedup;
|
private final boolean dedup;
|
||||||
|
private final int recursionLevel;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
|
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
|
||||||
|
@ -53,15 +54,24 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
dedup = settings.getAsBoolean("dedup", true);
|
dedup = settings.getAsBoolean("dedup", true);
|
||||||
|
|
||||||
|
recursionLevel = settings.getAsInt("recursion_level", 2);
|
||||||
|
if (recursionLevel < 0) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException(String.format(Locale.ROOT, "Negative recursion level not allowed for hunspell [%d]", recursionLevel));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
return new HunspellStemFilter(tokenStream, dictionary, dedup);
|
return new HunspellStemFilter(tokenStream, dictionary, dedup, recursionLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean dedup() {
|
public boolean dedup() {
|
||||||
return dedup;
|
return dedup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int recursionLevel() {
|
||||||
|
return recursionLevel;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -75,7 +75,7 @@ public class HunspellService extends AbstractComponent {
|
||||||
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
|
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
|
||||||
|
|
||||||
private final LoadingCache<String, HunspellDictionary> dictionaries;
|
private final LoadingCache<String, HunspellDictionary> dictionaries;
|
||||||
private final Map<String, HunspellDictionary> knownDicitionaries;
|
private final Map<String, HunspellDictionary> knownDictionaries;
|
||||||
|
|
||||||
private final boolean defaultIgnoreCase;
|
private final boolean defaultIgnoreCase;
|
||||||
private final boolean defaultStrictAffixParsing;
|
private final boolean defaultStrictAffixParsing;
|
||||||
|
@ -86,9 +86,9 @@ public class HunspellService extends AbstractComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDicitionaries) {
|
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDictionaries) {
|
||||||
super(settings);
|
super(settings);
|
||||||
this.knownDicitionaries = knownDicitionaries;
|
this.knownDictionaries = knownDictionaries;
|
||||||
this.hunspellDir = resolveHunspellDirectory(settings, env);
|
this.hunspellDir = resolveHunspellDirectory(settings, env);
|
||||||
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
|
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
|
||||||
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
|
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
|
||||||
|
@ -96,7 +96,7 @@ public class HunspellService extends AbstractComponent {
|
||||||
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
|
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
|
||||||
@Override
|
@Override
|
||||||
public HunspellDictionary load(String locale) throws Exception {
|
public HunspellDictionary load(String locale) throws Exception {
|
||||||
HunspellDictionary dictionary = knownDicitionaries.get(locale);
|
HunspellDictionary dictionary = knownDictionaries.get(locale);
|
||||||
if (dictionary == null) {
|
if (dictionary == null) {
|
||||||
dictionary = loadDictionary(locale, settings, env, version);
|
dictionary = loadDictionary(locale, settings, env, version);
|
||||||
}
|
}
|
||||||
|
@ -146,7 +146,7 @@ public class HunspellService extends AbstractComponent {
|
||||||
* @param env The node environment (from which the conf path will be resolved)
|
* @param env The node environment (from which the conf path will be resolved)
|
||||||
* @param version The lucene version
|
* @param version The lucene version
|
||||||
* @return The loaded Hunspell dictionary
|
* @return The loaded Hunspell dictionary
|
||||||
* @throws Exception when loading fails (due to IO erros or malformed dictionary files)
|
* @throws Exception when loading fails (due to IO errors or malformed dictionary files)
|
||||||
*/
|
*/
|
||||||
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
|
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.test.unit.index.analysis;
|
package org.elasticsearch.test.unit.index.analysis;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.inject.ProvisionException;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.analysis.AnalysisService;
|
import org.elasticsearch.index.analysis.AnalysisService;
|
||||||
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
|
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
|
||||||
|
@ -63,4 +64,46 @@ public class HunspellTokenFilterFactoryTests {
|
||||||
assertThat(hunspellTokenFilter.dedup(), is(false));
|
assertThat(hunspellTokenFilter.dedup(), is(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDefaultRecursionLevel() throws IOException {
|
||||||
|
Settings settings = settingsBuilder()
|
||||||
|
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
|
||||||
|
.put("index.analysis.filter.en_US.type", "hunspell")
|
||||||
|
.put("index.analysis.filter.en_US.locale", "en_US")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
|
||||||
|
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
|
||||||
|
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
|
||||||
|
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
|
||||||
|
assertThat(hunspellTokenFilter.recursionLevel(), is(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCustomRecursionLevel() throws IOException {
|
||||||
|
Settings settings = settingsBuilder()
|
||||||
|
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
|
||||||
|
.put("index.analysis.filter.en_US.type", "hunspell")
|
||||||
|
.put("index.analysis.filter.en_US.recursion_level", 0)
|
||||||
|
.put("index.analysis.filter.en_US.locale", "en_US")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
|
||||||
|
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
|
||||||
|
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
|
||||||
|
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
|
||||||
|
assertThat(hunspellTokenFilter.recursionLevel(), is(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = ProvisionException.class)
|
||||||
|
public void negativeRecursionLevelShouldFail() throws IOException {
|
||||||
|
Settings settings = settingsBuilder()
|
||||||
|
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
|
||||||
|
.put("index.analysis.filter.en_US.type", "hunspell")
|
||||||
|
.put("index.analysis.filter.en_US.recursion_level", -1)
|
||||||
|
.put("index.analysis.filter.en_US.locale", "en_US")
|
||||||
|
.build();
|
||||||
|
AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue