mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-05 20:48:22 +00:00
Support for hunspell token filter
Closes: #646 - Introduced HunspellService which holds a repository of hunspell dictionaries - It is possible to register a dictionary via a plugin or by placing the dictionary files on the file system
This commit is contained in:
parent
720feca3c5
commit
6c4108b38a
@ -496,6 +496,8 @@ public class AnalysisModule extends AbstractModule {
|
||||
|
||||
tokenFiltersBindings.processTokenFilter("keyword_marker", KeywordMarkerTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("stemmer_override", StemmerOverrideTokenFilterFactory.class);
|
||||
|
||||
tokenFiltersBindings.processTokenFilter("hunspell", HunspellTokenFilterFactory.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.indices.analysis.HunspellService;
|
||||
|
||||
@AnalysisSettingsRequired
|
||||
public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final HunspellDictionary dictionary;
|
||||
private final boolean dedup;
|
||||
|
||||
@Inject
|
||||
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
|
||||
super(index, indexSettings, name, settings);
|
||||
|
||||
String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
|
||||
if (locale == null) {
|
||||
throw new ElasticSearchIllegalArgumentException("missing [locale | language | lang] configuration for hunspell token filter");
|
||||
}
|
||||
|
||||
dictionary = hunspellService.getDictionary(locale);
|
||||
if (dictionary == null) {
|
||||
throw new ElasticSearchIllegalArgumentException(String.format("Unknown hunspell dictionary for locale [%s]", locale));
|
||||
}
|
||||
|
||||
dedup = settings.getAsBoolean("dedup", false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new HunspellStemFilter(tokenStream, dictionary, dedup);
|
||||
}
|
||||
}
|
@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Serves as a node level registry for hunspell dictionaries. This services expects all dictionaries to be located under
|
||||
* the {@code <path.conf>/hunspell} directory, where each locale has its dedicated sub-directory which holds the dictionary
|
||||
* files. For example, the dictionary files for {@code en_US} locale must be placed under {@code <path.conf>/hunspell/en_US}
|
||||
* directory.
|
||||
*
|
||||
* The following settings can be set for each dictionary:
|
||||
* <ul>
|
||||
* <li>{@code ignore_case} - If true, dictionary matching will be case insensitive (defaults to {@code false})</li>
|
||||
* <li>{@code strict_affix_parsing} - Determines whether errors while reading a affix rules file will cause exception or simple be ignored (defaults to {@code true})</li>
|
||||
* </ul>
|
||||
*
|
||||
* These settings can either be configured as node level configuration, such as:
|
||||
* <br/><br/>
|
||||
* <pre><code>
|
||||
* indices.analysis.hunspell.dictionary.en_US.ignore_case: true
|
||||
* indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing: false
|
||||
* </code></pre>
|
||||
*
|
||||
* or, as dedicated configuration per dictionary, placed in a {@code settings.yml} file under the dictionary directory. For
|
||||
* example, the following can be the content of the {@code <path.config>/hunspell/en_US/settings.yml} file:
|
||||
* <br/><br/>
|
||||
* <pre><code>
|
||||
* ignore_case: true
|
||||
* strict_affix_parsing: false
|
||||
* </code></pre>
|
||||
*
|
||||
* @see org.elasticsearch.index.analysis.HunspellTokenFilterFactory
|
||||
*/
|
||||
public class HunspellService extends AbstractComponent {
|
||||
|
||||
private final static DictionaryFileFilter DIC_FILE_FILTER = new DictionaryFileFilter();
|
||||
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
|
||||
|
||||
private final LoadingCache<String, HunspellDictionary> dictionaries;
|
||||
private final Map<String, HunspellDictionary> knownDicitionaries;
|
||||
|
||||
private final boolean defaultIgnoreCase;
|
||||
private final boolean defaultStrictAffixParsing;
|
||||
private final File hunspellDir;
|
||||
|
||||
public HunspellService(final Settings settings, final Environment env) {
|
||||
this(settings, env, Collections.<String, HunspellDictionary>emptyMap());
|
||||
}
|
||||
|
||||
@Inject
|
||||
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDicitionaries) {
|
||||
super(settings);
|
||||
this.knownDicitionaries = knownDicitionaries;
|
||||
this.hunspellDir = resolveHunspellDirectory(settings, env);
|
||||
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
|
||||
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
|
||||
final Version version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
|
||||
@Override
|
||||
public HunspellDictionary load(String locale) throws Exception {
|
||||
HunspellDictionary dictionary = knownDicitionaries.get(locale);
|
||||
if (dictionary == null) {
|
||||
dictionary = loadDictionary(locale, settings, env, version);
|
||||
}
|
||||
return dictionary;
|
||||
}
|
||||
});
|
||||
scanAndLoadDictionaries();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the hunspell dictionary for the given locale.
|
||||
*
|
||||
* @param locale The name of the locale
|
||||
*/
|
||||
public HunspellDictionary getDictionary(String locale) {
|
||||
return dictionaries.getUnchecked(locale);
|
||||
}
|
||||
|
||||
private File resolveHunspellDirectory(Settings settings, Environment env) {
|
||||
String location = settings.get("indices.analysis.hunspell.dictionary.location", null);
|
||||
if (location != null) {
|
||||
return new File(location);
|
||||
}
|
||||
return new File(env.configFile(), "hunspell");
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans the hunspell directory and loads all found dictionaries
|
||||
*/
|
||||
private void scanAndLoadDictionaries() {
|
||||
if (hunspellDir.exists() && hunspellDir.isDirectory()) {
|
||||
for (File file : hunspellDir.listFiles()) {
|
||||
if (file.isDirectory()) {
|
||||
if (file.list(AFFIX_FILE_FILTER).length > 0) { // just making sure it's indeed a dictionary dir
|
||||
dictionaries.getUnchecked(file.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the hunspell dictionary for the given local.
|
||||
*
|
||||
* @param locale The locale of the hunspell dictionary to be loaded.
|
||||
* @param nodeSettings The node level settings
|
||||
* @param env The node environment (from which the conf path will be resolved)
|
||||
* @param version The lucene version
|
||||
* @return The loaded Hunspell dictionary
|
||||
* @throws Exception when loading fails (due to IO erros or malformed dictionary files)
|
||||
*/
|
||||
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Loading huspell dictionary [{}]...", locale);
|
||||
}
|
||||
File hunspellConfDir = new File(env.configFile(), "hunspell");
|
||||
File dicDir = new File(hunspellConfDir, locale);
|
||||
if (!dicDir.exists() || !dicDir.isDirectory()) {
|
||||
throw new ElasticSearchException(String.format("Could not find hunspell dictionary [%s]", locale));
|
||||
}
|
||||
|
||||
// merging node settings with hunspell dictionary specific settings
|
||||
nodeSettings = loadDictionarySettings(dicDir, nodeSettings.getByPrefix("indices.analysis.hunspell.dictionary." + locale + "."));
|
||||
|
||||
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
|
||||
boolean strictAffixParsing = nodeSettings.getAsBoolean("strict_affix_parsing", defaultStrictAffixParsing);
|
||||
|
||||
File[] affixFiles = dicDir.listFiles(AFFIX_FILE_FILTER);
|
||||
if (affixFiles.length != 1) {
|
||||
throw new ElasticSearchException(String.format("Missing affix file for hunspell dictionary [%s]", locale));
|
||||
}
|
||||
InputStream affixStream = null;
|
||||
|
||||
File[] dicFiles = dicDir.listFiles(DIC_FILE_FILTER);
|
||||
List<InputStream> dicStreams = new ArrayList<InputStream>(dicFiles.length);
|
||||
try {
|
||||
|
||||
for (int i = 0; i < dicFiles.length; i++) {
|
||||
dicStreams.add(new FileInputStream(dicFiles[i]));
|
||||
}
|
||||
|
||||
affixStream = new FileInputStream(affixFiles[0]);
|
||||
|
||||
return new HunspellDictionary(affixStream, dicStreams, version, ignoreCase, strictAffixParsing);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("Could not load hunspell dictionary [{}]", e, locale);
|
||||
throw e;
|
||||
} finally {
|
||||
if (affixStream != null) {
|
||||
try {
|
||||
affixStream.close();
|
||||
} catch (IOException e) {
|
||||
// nothing much we can do here
|
||||
}
|
||||
}
|
||||
for (InputStream in : dicStreams) {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
} catch (IOException e) {
|
||||
// nothing much we can do here
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Each hunspell dictionary directory may contain a {@code settings.yml} which holds dictionary specific settings. Default
|
||||
* values for these settings are defined in the given default settings.
|
||||
*
|
||||
* @param dir The directory of the dictionary
|
||||
* @param defaults The default settings for this dictionary
|
||||
* @return The resolved settings.
|
||||
*/
|
||||
private static Settings loadDictionarySettings(File dir, Settings defaults) {
|
||||
File file = new File(dir, "settings.yml");
|
||||
if (file.exists()) {
|
||||
try {
|
||||
return ImmutableSettings.settingsBuilder().loadFromUrl(file.toURI().toURL()).put(defaults).build();
|
||||
} catch (MalformedURLException e) {
|
||||
throw new ElasticSearchException(String.format("Could not load hunspell dictionary settings from [%s]", file.getAbsolutePath()), e);
|
||||
}
|
||||
}
|
||||
|
||||
file = new File(dir, "settings.json");
|
||||
if (file.exists()) {
|
||||
try {
|
||||
return ImmutableSettings.settingsBuilder().loadFromUrl(file.toURI().toURL()).put(defaults).build();
|
||||
} catch (MalformedURLException e) {
|
||||
throw new ElasticSearchException(String.format("Could not load hunspell dictionary settings from [%s]", file.getAbsolutePath()), e);
|
||||
}
|
||||
}
|
||||
|
||||
return defaults;
|
||||
}
|
||||
|
||||
/**
|
||||
* Only accepts {@code *.dic} files
|
||||
*/
|
||||
static class DictionaryFileFilter implements FilenameFilter {
|
||||
@Override
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.toLowerCase().endsWith(".dic");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Only accepts {@code *.aff} files
|
||||
*/
|
||||
static class AffixFileFilter implements FilenameFilter {
|
||||
@Override
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.toLowerCase().endsWith(".aff");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,12 +19,29 @@
|
||||
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.elasticsearch.common.inject.AbstractModule;
|
||||
import org.elasticsearch.common.inject.multibindings.MapBinder;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class IndicesAnalysisModule extends AbstractModule {
|
||||
|
||||
private final Map<String, HunspellDictionary> hunspellDictionaries = Maps.newHashMap();
|
||||
|
||||
public void addHunspellDictionary(String lang, HunspellDictionary dictionary) {
|
||||
hunspellDictionaries.put(lang, dictionary);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configure() {
|
||||
bind(IndicesAnalysisService.class).asEagerSingleton();
|
||||
|
||||
MapBinder<String, HunspellDictionary> dictionariesBinder = MapBinder.newMapBinder(binder(), String.class, HunspellDictionary.class);
|
||||
for (Map.Entry<String, HunspellDictionary> entry : hunspellDictionaries.entrySet()) {
|
||||
dictionariesBinder.addBinding(entry.getKey()).toInstance(entry.getValue());
|
||||
}
|
||||
bind(HunspellService.class).asEagerSingleton();
|
||||
}
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.integration.indices.analyze;
|
||||
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.indices.analysis.HunspellService;
|
||||
import org.elasticsearch.node.Node;
|
||||
import org.elasticsearch.node.internal.InternalNode;
|
||||
import org.elasticsearch.test.integration.AbstractNodesTests;
|
||||
import org.testng.annotations.AfterClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class HunspellServiceTests extends AbstractNodesTests {
|
||||
|
||||
|
||||
@AfterClass
|
||||
public void closeNodes() {
|
||||
closeAllNodes();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLocaleDirectoryWithNodeLevelConfig() throws Exception {
|
||||
Settings settings = ImmutableSettings.settingsBuilder()
|
||||
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
|
||||
.put("indices.analysis.hunspell.dictionary.lazy", true)
|
||||
.put("indices.analysis.hunspell.dictionary.ignore_case", true)
|
||||
.build();
|
||||
|
||||
Node node = startNode("node1", settings);
|
||||
|
||||
HunspellDictionary dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
assertThat(dictionary, notNullValue());
|
||||
Version expectedVersion = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
|
||||
assertThat(dictionary.isIgnoreCase(), equalTo(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLocaleDirectoryWithLocaleSpecificConfig() throws Exception {
|
||||
Settings settings = ImmutableSettings.settingsBuilder()
|
||||
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
|
||||
.put("indices.analysis.hunspell.dictionary.lazy", true)
|
||||
.put("indices.analysis.hunspell.dictionary.ignore_case", true)
|
||||
.put("indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing", false)
|
||||
.put("indices.analysis.hunspell.dictionary.en_US.ignore_case", false)
|
||||
.build();
|
||||
|
||||
Node node = startNode("node1", settings);
|
||||
|
||||
HunspellDictionary dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
assertThat(dictionary, notNullValue());
|
||||
Version expectedVersion = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
|
||||
assertThat(dictionary.isIgnoreCase(), equalTo(false));
|
||||
|
||||
|
||||
// testing that dictionary specific settings override node level settings
|
||||
dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US_custom");
|
||||
assertThat(dictionary, notNullValue());
|
||||
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
|
||||
assertThat(dictionary.isIgnoreCase(), equalTo(true));
|
||||
}
|
||||
|
||||
}
|
201
src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.aff
Executable file
201
src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.aff
Executable file
@ -0,0 +1,201 @@
|
||||
SET ISO8859-1
|
||||
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
|
||||
NOSUGGEST !
|
||||
|
||||
# ordinal numbers
|
||||
COMPOUNDMIN 1
|
||||
# only in compounds: 1th, 2th, 3th
|
||||
ONLYINCOMPOUND c
|
||||
# compound rules:
|
||||
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
|
||||
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
|
||||
COMPOUNDRULE 2
|
||||
COMPOUNDRULE n*1t
|
||||
COMPOUNDRULE n*mp
|
||||
WORDCHARS 0123456789
|
||||
|
||||
PFX A Y 1
|
||||
PFX A 0 re .
|
||||
|
||||
PFX I Y 1
|
||||
PFX I 0 in .
|
||||
|
||||
PFX U Y 1
|
||||
PFX U 0 un .
|
||||
|
||||
PFX C Y 1
|
||||
PFX C 0 de .
|
||||
|
||||
PFX E Y 1
|
||||
PFX E 0 dis .
|
||||
|
||||
PFX F Y 1
|
||||
PFX F 0 con .
|
||||
|
||||
PFX K Y 1
|
||||
PFX K 0 pro .
|
||||
|
||||
SFX V N 2
|
||||
SFX V e ive e
|
||||
SFX V 0 ive [^e]
|
||||
|
||||
SFX N Y 3
|
||||
SFX N e ion e
|
||||
SFX N y ication y
|
||||
SFX N 0 en [^ey]
|
||||
|
||||
SFX X Y 3
|
||||
SFX X e ions e
|
||||
SFX X y ications y
|
||||
SFX X 0 ens [^ey]
|
||||
|
||||
SFX H N 2
|
||||
SFX H y ieth y
|
||||
SFX H 0 th [^y]
|
||||
|
||||
SFX Y Y 1
|
||||
SFX Y 0 ly .
|
||||
|
||||
SFX G Y 2
|
||||
SFX G e ing e
|
||||
SFX G 0 ing [^e]
|
||||
|
||||
SFX J Y 2
|
||||
SFX J e ings e
|
||||
SFX J 0 ings [^e]
|
||||
|
||||
SFX D Y 4
|
||||
SFX D 0 d e
|
||||
SFX D y ied [^aeiou]y
|
||||
SFX D 0 ed [^ey]
|
||||
SFX D 0 ed [aeiou]y
|
||||
|
||||
SFX T N 4
|
||||
SFX T 0 st e
|
||||
SFX T y iest [^aeiou]y
|
||||
SFX T 0 est [aeiou]y
|
||||
SFX T 0 est [^ey]
|
||||
|
||||
SFX R Y 4
|
||||
SFX R 0 r e
|
||||
SFX R y ier [^aeiou]y
|
||||
SFX R 0 er [aeiou]y
|
||||
SFX R 0 er [^ey]
|
||||
|
||||
SFX Z Y 4
|
||||
SFX Z 0 rs e
|
||||
SFX Z y iers [^aeiou]y
|
||||
SFX Z 0 ers [aeiou]y
|
||||
SFX Z 0 ers [^ey]
|
||||
|
||||
SFX S Y 4
|
||||
SFX S y ies [^aeiou]y
|
||||
SFX S 0 s [aeiou]y
|
||||
SFX S 0 es [sxzh]
|
||||
SFX S 0 s [^sxzhy]
|
||||
|
||||
SFX P Y 3
|
||||
SFX P y iness [^aeiou]y
|
||||
SFX P 0 ness [aeiou]y
|
||||
SFX P 0 ness [^y]
|
||||
|
||||
SFX M Y 1
|
||||
SFX M 0 's .
|
||||
|
||||
SFX B Y 3
|
||||
SFX B 0 able [^aeiou]
|
||||
SFX B 0 able ee
|
||||
SFX B e able [^aeiou]e
|
||||
|
||||
SFX L Y 1
|
||||
SFX L 0 ment .
|
||||
|
||||
REP 88
|
||||
REP a ei
|
||||
REP ei a
|
||||
REP a ey
|
||||
REP ey a
|
||||
REP ai ie
|
||||
REP ie ai
|
||||
REP are air
|
||||
REP are ear
|
||||
REP are eir
|
||||
REP air are
|
||||
REP air ere
|
||||
REP ere air
|
||||
REP ere ear
|
||||
REP ere eir
|
||||
REP ear are
|
||||
REP ear air
|
||||
REP ear ere
|
||||
REP eir are
|
||||
REP eir ere
|
||||
REP ch te
|
||||
REP te ch
|
||||
REP ch ti
|
||||
REP ti ch
|
||||
REP ch tu
|
||||
REP tu ch
|
||||
REP ch s
|
||||
REP s ch
|
||||
REP ch k
|
||||
REP k ch
|
||||
REP f ph
|
||||
REP ph f
|
||||
REP gh f
|
||||
REP f gh
|
||||
REP i igh
|
||||
REP igh i
|
||||
REP i uy
|
||||
REP uy i
|
||||
REP i ee
|
||||
REP ee i
|
||||
REP j di
|
||||
REP di j
|
||||
REP j gg
|
||||
REP gg j
|
||||
REP j ge
|
||||
REP ge j
|
||||
REP s ti
|
||||
REP ti s
|
||||
REP s ci
|
||||
REP ci s
|
||||
REP k cc
|
||||
REP cc k
|
||||
REP k qu
|
||||
REP qu k
|
||||
REP kw qu
|
||||
REP o eau
|
||||
REP eau o
|
||||
REP o ew
|
||||
REP ew o
|
||||
REP oo ew
|
||||
REP ew oo
|
||||
REP ew ui
|
||||
REP ui ew
|
||||
REP oo ui
|
||||
REP ui oo
|
||||
REP ew u
|
||||
REP u ew
|
||||
REP oo u
|
||||
REP u oo
|
||||
REP u oe
|
||||
REP oe u
|
||||
REP u ieu
|
||||
REP ieu u
|
||||
REP ue ew
|
||||
REP ew ue
|
||||
REP uff ough
|
||||
REP oo ieu
|
||||
REP ieu oo
|
||||
REP ier ear
|
||||
REP ear ier
|
||||
REP ear air
|
||||
REP air ear
|
||||
REP w qu
|
||||
REP qu w
|
||||
REP z ss
|
||||
REP ss z
|
||||
REP shun tion
|
||||
REP shun sion
|
||||
REP shun cion
|
62120
src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.dic
Executable file
62120
src/test/resources/indices/analyze/conf_dir/hunspell/en_US/en_US.dic
Executable file
File diff suppressed because it is too large
Load Diff
201
src/test/resources/indices/analyze/conf_dir/hunspell/en_US_custom/en_US.aff
Executable file
201
src/test/resources/indices/analyze/conf_dir/hunspell/en_US_custom/en_US.aff
Executable file
@ -0,0 +1,201 @@
|
||||
SET ISO8859-1
|
||||
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
|
||||
NOSUGGEST !
|
||||
|
||||
# ordinal numbers
|
||||
COMPOUNDMIN 1
|
||||
# only in compounds: 1th, 2th, 3th
|
||||
ONLYINCOMPOUND c
|
||||
# compound rules:
|
||||
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
|
||||
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
|
||||
COMPOUNDRULE 2
|
||||
COMPOUNDRULE n*1t
|
||||
COMPOUNDRULE n*mp
|
||||
WORDCHARS 0123456789
|
||||
|
||||
PFX A Y 1
|
||||
PFX A 0 re .
|
||||
|
||||
PFX I Y 1
|
||||
PFX I 0 in .
|
||||
|
||||
PFX U Y 1
|
||||
PFX U 0 un .
|
||||
|
||||
PFX C Y 1
|
||||
PFX C 0 de .
|
||||
|
||||
PFX E Y 1
|
||||
PFX E 0 dis .
|
||||
|
||||
PFX F Y 1
|
||||
PFX F 0 con .
|
||||
|
||||
PFX K Y 1
|
||||
PFX K 0 pro .
|
||||
|
||||
SFX V N 2
|
||||
SFX V e ive e
|
||||
SFX V 0 ive [^e]
|
||||
|
||||
SFX N Y 3
|
||||
SFX N e ion e
|
||||
SFX N y ication y
|
||||
SFX N 0 en [^ey]
|
||||
|
||||
SFX X Y 3
|
||||
SFX X e ions e
|
||||
SFX X y ications y
|
||||
SFX X 0 ens [^ey]
|
||||
|
||||
SFX H N 2
|
||||
SFX H y ieth y
|
||||
SFX H 0 th [^y]
|
||||
|
||||
SFX Y Y 1
|
||||
SFX Y 0 ly .
|
||||
|
||||
SFX G Y 2
|
||||
SFX G e ing e
|
||||
SFX G 0 ing [^e]
|
||||
|
||||
SFX J Y 2
|
||||
SFX J e ings e
|
||||
SFX J 0 ings [^e]
|
||||
|
||||
SFX D Y 4
|
||||
SFX D 0 d e
|
||||
SFX D y ied [^aeiou]y
|
||||
SFX D 0 ed [^ey]
|
||||
SFX D 0 ed [aeiou]y
|
||||
|
||||
SFX T N 4
|
||||
SFX T 0 st e
|
||||
SFX T y iest [^aeiou]y
|
||||
SFX T 0 est [aeiou]y
|
||||
SFX T 0 est [^ey]
|
||||
|
||||
SFX R Y 4
|
||||
SFX R 0 r e
|
||||
SFX R y ier [^aeiou]y
|
||||
SFX R 0 er [aeiou]y
|
||||
SFX R 0 er [^ey]
|
||||
|
||||
SFX Z Y 4
|
||||
SFX Z 0 rs e
|
||||
SFX Z y iers [^aeiou]y
|
||||
SFX Z 0 ers [aeiou]y
|
||||
SFX Z 0 ers [^ey]
|
||||
|
||||
SFX S Y 4
|
||||
SFX S y ies [^aeiou]y
|
||||
SFX S 0 s [aeiou]y
|
||||
SFX S 0 es [sxzh]
|
||||
SFX S 0 s [^sxzhy]
|
||||
|
||||
SFX P Y 3
|
||||
SFX P y iness [^aeiou]y
|
||||
SFX P 0 ness [aeiou]y
|
||||
SFX P 0 ness [^y]
|
||||
|
||||
SFX M Y 1
|
||||
SFX M 0 's .
|
||||
|
||||
SFX B Y 3
|
||||
SFX B 0 able [^aeiou]
|
||||
SFX B 0 able ee
|
||||
SFX B e able [^aeiou]e
|
||||
|
||||
SFX L Y 1
|
||||
SFX L 0 ment .
|
||||
|
||||
REP 88
|
||||
REP a ei
|
||||
REP ei a
|
||||
REP a ey
|
||||
REP ey a
|
||||
REP ai ie
|
||||
REP ie ai
|
||||
REP are air
|
||||
REP are ear
|
||||
REP are eir
|
||||
REP air are
|
||||
REP air ere
|
||||
REP ere air
|
||||
REP ere ear
|
||||
REP ere eir
|
||||
REP ear are
|
||||
REP ear air
|
||||
REP ear ere
|
||||
REP eir are
|
||||
REP eir ere
|
||||
REP ch te
|
||||
REP te ch
|
||||
REP ch ti
|
||||
REP ti ch
|
||||
REP ch tu
|
||||
REP tu ch
|
||||
REP ch s
|
||||
REP s ch
|
||||
REP ch k
|
||||
REP k ch
|
||||
REP f ph
|
||||
REP ph f
|
||||
REP gh f
|
||||
REP f gh
|
||||
REP i igh
|
||||
REP igh i
|
||||
REP i uy
|
||||
REP uy i
|
||||
REP i ee
|
||||
REP ee i
|
||||
REP j di
|
||||
REP di j
|
||||
REP j gg
|
||||
REP gg j
|
||||
REP j ge
|
||||
REP ge j
|
||||
REP s ti
|
||||
REP ti s
|
||||
REP s ci
|
||||
REP ci s
|
||||
REP k cc
|
||||
REP cc k
|
||||
REP k qu
|
||||
REP qu k
|
||||
REP kw qu
|
||||
REP o eau
|
||||
REP eau o
|
||||
REP o ew
|
||||
REP ew o
|
||||
REP oo ew
|
||||
REP ew oo
|
||||
REP ew ui
|
||||
REP ui ew
|
||||
REP oo ui
|
||||
REP ui oo
|
||||
REP ew u
|
||||
REP u ew
|
||||
REP oo u
|
||||
REP u oo
|
||||
REP u oe
|
||||
REP oe u
|
||||
REP u ieu
|
||||
REP ieu u
|
||||
REP ue ew
|
||||
REP ew ue
|
||||
REP uff ough
|
||||
REP oo ieu
|
||||
REP ieu oo
|
||||
REP ier ear
|
||||
REP ear ier
|
||||
REP ear air
|
||||
REP air ear
|
||||
REP w qu
|
||||
REP qu w
|
||||
REP z ss
|
||||
REP ss z
|
||||
REP shun tion
|
||||
REP shun sion
|
||||
REP shun cion
|
62120
src/test/resources/indices/analyze/conf_dir/hunspell/en_US_custom/en_US.dic
Executable file
62120
src/test/resources/indices/analyze/conf_dir/hunspell/en_US_custom/en_US.dic
Executable file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,2 @@
|
||||
ignore_case: true
|
||||
strict_affix_parsing: true
|
Loading…
x
Reference in New Issue
Block a user