Support for hunspell token filter

Closes: #646

- Introduced HunspellService which holds a repository of hunspell dictionaries
- It is possible to register a dictionary via a plugin or by placing the dictionary files on the file system
This commit is contained in:
uboness 2013-01-02 03:51:26 +01:00
parent 720feca3c5
commit 6c4108b38a
10 changed files with 125073 additions and 0 deletions

View File

@ -496,6 +496,8 @@ public class AnalysisModule extends AbstractModule {
tokenFiltersBindings.processTokenFilter("keyword_marker", KeywordMarkerTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("stemmer_override", StemmerOverrideTokenFilterFactory.class);
tokenFiltersBindings.processTokenFilter("hunspell", HunspellTokenFilterFactory.class);
}
@Override

View File

@ -0,0 +1,60 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.analysis.HunspellService;
@AnalysisSettingsRequired
public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
private final HunspellDictionary dictionary;
private final boolean dedup;
@Inject
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
super(index, indexSettings, name, settings);
String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
if (locale == null) {
throw new ElasticSearchIllegalArgumentException("missing [locale | language | lang] configuration for hunspell token filter");
}
dictionary = hunspellService.getDictionary(locale);
if (dictionary == null) {
throw new ElasticSearchIllegalArgumentException(String.format("Unknown hunspell dictionary for locale [%s]", locale));
}
dedup = settings.getAsBoolean("dedup", false);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new HunspellStemFilter(tokenStream, dictionary, dedup);
}
}

View File

@ -0,0 +1,258 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.apache.lucene.util.Version;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import java.io.*;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* Serves as a node level registry for hunspell dictionaries. This services expects all dictionaries to be located under
* the {@code <path.conf>/hunspell} directory, where each locale has its dedicated sub-directory which holds the dictionary
* files. For example, the dictionary files for {@code en_US} locale must be placed under {@code <path.conf>/hunspell/en_US}
* directory.
*
* The following settings can be set for each dictionary:
* <ul>
* <li>{@code ignore_case} - If true, dictionary matching will be case insensitive (defaults to {@code false})</li>
* <li>{@code strict_affix_parsing} - Determines whether errors while reading a affix rules file will cause exception or simple be ignored (defaults to {@code true})</li>
* </ul>
*
* These settings can either be configured as node level configuration, such as:
* <br/><br/>
* <pre><code>
* indices.analysis.hunspell.dictionary.en_US.ignore_case: true
* indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing: false
* </code></pre>
*
* or, as dedicated configuration per dictionary, placed in a {@code settings.yml} file under the dictionary directory. For
* example, the following can be the content of the {@code <path.config>/hunspell/en_US/settings.yml} file:
* <br/><br/>
* <pre><code>
* ignore_case: true
* strict_affix_parsing: false
* </code></pre>
*
* @see org.elasticsearch.index.analysis.HunspellTokenFilterFactory
*/
public class HunspellService extends AbstractComponent {
private final static DictionaryFileFilter DIC_FILE_FILTER = new DictionaryFileFilter();
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
private final LoadingCache<String, HunspellDictionary> dictionaries;
private final Map<String, HunspellDictionary> knownDicitionaries;
private final boolean defaultIgnoreCase;
private final boolean defaultStrictAffixParsing;
private final File hunspellDir;
public HunspellService(final Settings settings, final Environment env) {
this(settings, env, Collections.<String, HunspellDictionary>emptyMap());
}
@Inject
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDicitionaries) {
super(settings);
this.knownDicitionaries = knownDicitionaries;
this.hunspellDir = resolveHunspellDirectory(settings, env);
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
final Version version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
@Override
public HunspellDictionary load(String locale) throws Exception {
HunspellDictionary dictionary = knownDicitionaries.get(locale);
if (dictionary == null) {
dictionary = loadDictionary(locale, settings, env, version);
}
return dictionary;
}
});
scanAndLoadDictionaries();
}
/**
* Returns the hunspell dictionary for the given locale.
*
* @param locale The name of the locale
*/
public HunspellDictionary getDictionary(String locale) {
return dictionaries.getUnchecked(locale);
}
private File resolveHunspellDirectory(Settings settings, Environment env) {
String location = settings.get("indices.analysis.hunspell.dictionary.location", null);
if (location != null) {
return new File(location);
}
return new File(env.configFile(), "hunspell");
}
/**
* Scans the hunspell directory and loads all found dictionaries
*/
private void scanAndLoadDictionaries() {
if (hunspellDir.exists() && hunspellDir.isDirectory()) {
for (File file : hunspellDir.listFiles()) {
if (file.isDirectory()) {
if (file.list(AFFIX_FILE_FILTER).length > 0) { // just making sure it's indeed a dictionary dir
dictionaries.getUnchecked(file.getName());
}
}
}
}
}
/**
* Loads the hunspell dictionary for the given local.
*
* @param locale The locale of the hunspell dictionary to be loaded.
* @param nodeSettings The node level settings
* @param env The node environment (from which the conf path will be resolved)
* @param version The lucene version
* @return The loaded Hunspell dictionary
* @throws Exception when loading fails (due to IO erros or malformed dictionary files)
*/
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
if (logger.isDebugEnabled()) {
logger.debug("Loading huspell dictionary [{}]...", locale);
}
File hunspellConfDir = new File(env.configFile(), "hunspell");
File dicDir = new File(hunspellConfDir, locale);
if (!dicDir.exists() || !dicDir.isDirectory()) {
throw new ElasticSearchException(String.format("Could not find hunspell dictionary [%s]", locale));
}
// merging node settings with hunspell dictionary specific settings
nodeSettings = loadDictionarySettings(dicDir, nodeSettings.getByPrefix("indices.analysis.hunspell.dictionary." + locale + "."));
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
boolean strictAffixParsing = nodeSettings.getAsBoolean("strict_affix_parsing", defaultStrictAffixParsing);
File[] affixFiles = dicDir.listFiles(AFFIX_FILE_FILTER);
if (affixFiles.length != 1) {
throw new ElasticSearchException(String.format("Missing affix file for hunspell dictionary [%s]", locale));
}
InputStream affixStream = null;
File[] dicFiles = dicDir.listFiles(DIC_FILE_FILTER);
List<InputStream> dicStreams = new ArrayList<InputStream>(dicFiles.length);
try {
for (int i = 0; i < dicFiles.length; i++) {
dicStreams.add(new FileInputStream(dicFiles[i]));
}
affixStream = new FileInputStream(affixFiles[0]);
return new HunspellDictionary(affixStream, dicStreams, version, ignoreCase, strictAffixParsing);
} catch (Exception e) {
logger.error("Could not load hunspell dictionary [{}]", e, locale);
throw e;
} finally {
if (affixStream != null) {
try {
affixStream.close();
} catch (IOException e) {
// nothing much we can do here
}
}
for (InputStream in : dicStreams) {
if (in != null) {
try {
in.close();
} catch (IOException e) {
// nothing much we can do here
}
}
}
}
}
/**
* Each hunspell dictionary directory may contain a {@code settings.yml} which holds dictionary specific settings. Default
* values for these settings are defined in the given default settings.
*
* @param dir The directory of the dictionary
* @param defaults The default settings for this dictionary
* @return The resolved settings.
*/
private static Settings loadDictionarySettings(File dir, Settings defaults) {
File file = new File(dir, "settings.yml");
if (file.exists()) {
try {
return ImmutableSettings.settingsBuilder().loadFromUrl(file.toURI().toURL()).put(defaults).build();
} catch (MalformedURLException e) {
throw new ElasticSearchException(String.format("Could not load hunspell dictionary settings from [%s]", file.getAbsolutePath()), e);
}
}
file = new File(dir, "settings.json");
if (file.exists()) {
try {
return ImmutableSettings.settingsBuilder().loadFromUrl(file.toURI().toURL()).put(defaults).build();
} catch (MalformedURLException e) {
throw new ElasticSearchException(String.format("Could not load hunspell dictionary settings from [%s]", file.getAbsolutePath()), e);
}
}
return defaults;
}
/**
* Only accepts {@code *.dic} files
*/
static class DictionaryFileFilter implements FilenameFilter {
@Override
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(".dic");
}
}
/**
* Only accepts {@code *.aff} files
*/
static class AffixFileFilter implements FilenameFilter {
@Override
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(".aff");
}
}
}

View File

@ -19,12 +19,29 @@
package org.elasticsearch.indices.analysis;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.common.inject.multibindings.MapBinder;
import java.util.Map;
public class IndicesAnalysisModule extends AbstractModule {
private final Map<String, HunspellDictionary> hunspellDictionaries = Maps.newHashMap();
public void addHunspellDictionary(String lang, HunspellDictionary dictionary) {
hunspellDictionaries.put(lang, dictionary);
}
@Override
protected void configure() {
bind(IndicesAnalysisService.class).asEagerSingleton();
MapBinder<String, HunspellDictionary> dictionariesBinder = MapBinder.newMapBinder(binder(), String.class, HunspellDictionary.class);
for (Map.Entry<String, HunspellDictionary> entry : hunspellDictionaries.entrySet()) {
dictionariesBinder.addBinding(entry.getKey()).toInstance(entry.getValue());
}
bind(HunspellService.class).asEagerSingleton();
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.indices.analyze;
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.analysis.HunspellService;
import org.elasticsearch.node.Node;
import org.elasticsearch.node.internal.InternalNode;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
/**
*
*/
public class HunspellServiceTests extends AbstractNodesTests {
@AfterClass
public void closeNodes() {
closeAllNodes();
}
@Test
public void testLocaleDirectoryWithNodeLevelConfig() throws Exception {
Settings settings = ImmutableSettings.settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("indices.analysis.hunspell.dictionary.lazy", true)
.put("indices.analysis.hunspell.dictionary.ignore_case", true)
.build();
Node node = startNode("node1", settings);
HunspellDictionary dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US");
assertThat(dictionary, notNullValue());
Version expectedVersion = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
assertThat(dictionary.isIgnoreCase(), equalTo(true));
}
@Test
public void testLocaleDirectoryWithLocaleSpecificConfig() throws Exception {
Settings settings = ImmutableSettings.settingsBuilder()
.put("path.conf", getClass().getResource("/indices/analyze/conf_dir").getFile())
.put("indices.analysis.hunspell.dictionary.lazy", true)
.put("indices.analysis.hunspell.dictionary.ignore_case", true)
.put("indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing", false)
.put("indices.analysis.hunspell.dictionary.en_US.ignore_case", false)
.build();
Node node = startNode("node1", settings);
HunspellDictionary dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US");
assertThat(dictionary, notNullValue());
Version expectedVersion = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
assertThat(dictionary.isIgnoreCase(), equalTo(false));
// testing that dictionary specific settings override node level settings
dictionary = ((InternalNode) node).injector().getInstance(HunspellService.class).getDictionary("en_US_custom");
assertThat(dictionary, notNullValue());
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
assertThat(dictionary.isIgnoreCase(), equalTo(true));
}
}

View File

@ -0,0 +1,201 @@
SET ISO8859-1
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
NOSUGGEST !
# ordinal numbers
COMPOUNDMIN 1
# only in compounds: 1th, 2th, 3th
ONLYINCOMPOUND c
# compound rules:
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
COMPOUNDRULE 2
COMPOUNDRULE n*1t
COMPOUNDRULE n*mp
WORDCHARS 0123456789
PFX A Y 1
PFX A 0 re .
PFX I Y 1
PFX I 0 in .
PFX U Y 1
PFX U 0 un .
PFX C Y 1
PFX C 0 de .
PFX E Y 1
PFX E 0 dis .
PFX F Y 1
PFX F 0 con .
PFX K Y 1
PFX K 0 pro .
SFX V N 2
SFX V e ive e
SFX V 0 ive [^e]
SFX N Y 3
SFX N e ion e
SFX N y ication y
SFX N 0 en [^ey]
SFX X Y 3
SFX X e ions e
SFX X y ications y
SFX X 0 ens [^ey]
SFX H N 2
SFX H y ieth y
SFX H 0 th [^y]
SFX Y Y 1
SFX Y 0 ly .
SFX G Y 2
SFX G e ing e
SFX G 0 ing [^e]
SFX J Y 2
SFX J e ings e
SFX J 0 ings [^e]
SFX D Y 4
SFX D 0 d e
SFX D y ied [^aeiou]y
SFX D 0 ed [^ey]
SFX D 0 ed [aeiou]y
SFX T N 4
SFX T 0 st e
SFX T y iest [^aeiou]y
SFX T 0 est [aeiou]y
SFX T 0 est [^ey]
SFX R Y 4
SFX R 0 r e
SFX R y ier [^aeiou]y
SFX R 0 er [aeiou]y
SFX R 0 er [^ey]
SFX Z Y 4
SFX Z 0 rs e
SFX Z y iers [^aeiou]y
SFX Z 0 ers [aeiou]y
SFX Z 0 ers [^ey]
SFX S Y 4
SFX S y ies [^aeiou]y
SFX S 0 s [aeiou]y
SFX S 0 es [sxzh]
SFX S 0 s [^sxzhy]
SFX P Y 3
SFX P y iness [^aeiou]y
SFX P 0 ness [aeiou]y
SFX P 0 ness [^y]
SFX M Y 1
SFX M 0 's .
SFX B Y 3
SFX B 0 able [^aeiou]
SFX B 0 able ee
SFX B e able [^aeiou]e
SFX L Y 1
SFX L 0 ment .
REP 88
REP a ei
REP ei a
REP a ey
REP ey a
REP ai ie
REP ie ai
REP are air
REP are ear
REP are eir
REP air are
REP air ere
REP ere air
REP ere ear
REP ere eir
REP ear are
REP ear air
REP ear ere
REP eir are
REP eir ere
REP ch te
REP te ch
REP ch ti
REP ti ch
REP ch tu
REP tu ch
REP ch s
REP s ch
REP ch k
REP k ch
REP f ph
REP ph f
REP gh f
REP f gh
REP i igh
REP igh i
REP i uy
REP uy i
REP i ee
REP ee i
REP j di
REP di j
REP j gg
REP gg j
REP j ge
REP ge j
REP s ti
REP ti s
REP s ci
REP ci s
REP k cc
REP cc k
REP k qu
REP qu k
REP kw qu
REP o eau
REP eau o
REP o ew
REP ew o
REP oo ew
REP ew oo
REP ew ui
REP ui ew
REP oo ui
REP ui oo
REP ew u
REP u ew
REP oo u
REP u oo
REP u oe
REP oe u
REP u ieu
REP ieu u
REP ue ew
REP ew ue
REP uff ough
REP oo ieu
REP ieu oo
REP ier ear
REP ear ier
REP ear air
REP air ear
REP w qu
REP qu w
REP z ss
REP ss z
REP shun tion
REP shun sion
REP shun cion

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,201 @@
SET ISO8859-1
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
NOSUGGEST !
# ordinal numbers
COMPOUNDMIN 1
# only in compounds: 1th, 2th, 3th
ONLYINCOMPOUND c
# compound rules:
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
COMPOUNDRULE 2
COMPOUNDRULE n*1t
COMPOUNDRULE n*mp
WORDCHARS 0123456789
PFX A Y 1
PFX A 0 re .
PFX I Y 1
PFX I 0 in .
PFX U Y 1
PFX U 0 un .
PFX C Y 1
PFX C 0 de .
PFX E Y 1
PFX E 0 dis .
PFX F Y 1
PFX F 0 con .
PFX K Y 1
PFX K 0 pro .
SFX V N 2
SFX V e ive e
SFX V 0 ive [^e]
SFX N Y 3
SFX N e ion e
SFX N y ication y
SFX N 0 en [^ey]
SFX X Y 3
SFX X e ions e
SFX X y ications y
SFX X 0 ens [^ey]
SFX H N 2
SFX H y ieth y
SFX H 0 th [^y]
SFX Y Y 1
SFX Y 0 ly .
SFX G Y 2
SFX G e ing e
SFX G 0 ing [^e]
SFX J Y 2
SFX J e ings e
SFX J 0 ings [^e]
SFX D Y 4
SFX D 0 d e
SFX D y ied [^aeiou]y
SFX D 0 ed [^ey]
SFX D 0 ed [aeiou]y
SFX T N 4
SFX T 0 st e
SFX T y iest [^aeiou]y
SFX T 0 est [aeiou]y
SFX T 0 est [^ey]
SFX R Y 4
SFX R 0 r e
SFX R y ier [^aeiou]y
SFX R 0 er [aeiou]y
SFX R 0 er [^ey]
SFX Z Y 4
SFX Z 0 rs e
SFX Z y iers [^aeiou]y
SFX Z 0 ers [aeiou]y
SFX Z 0 ers [^ey]
SFX S Y 4
SFX S y ies [^aeiou]y
SFX S 0 s [aeiou]y
SFX S 0 es [sxzh]
SFX S 0 s [^sxzhy]
SFX P Y 3
SFX P y iness [^aeiou]y
SFX P 0 ness [aeiou]y
SFX P 0 ness [^y]
SFX M Y 1
SFX M 0 's .
SFX B Y 3
SFX B 0 able [^aeiou]
SFX B 0 able ee
SFX B e able [^aeiou]e
SFX L Y 1
SFX L 0 ment .
REP 88
REP a ei
REP ei a
REP a ey
REP ey a
REP ai ie
REP ie ai
REP are air
REP are ear
REP are eir
REP air are
REP air ere
REP ere air
REP ere ear
REP ere eir
REP ear are
REP ear air
REP ear ere
REP eir are
REP eir ere
REP ch te
REP te ch
REP ch ti
REP ti ch
REP ch tu
REP tu ch
REP ch s
REP s ch
REP ch k
REP k ch
REP f ph
REP ph f
REP gh f
REP f gh
REP i igh
REP igh i
REP i uy
REP uy i
REP i ee
REP ee i
REP j di
REP di j
REP j gg
REP gg j
REP j ge
REP ge j
REP s ti
REP ti s
REP s ci
REP ci s
REP k cc
REP cc k
REP k qu
REP qu k
REP kw qu
REP o eau
REP eau o
REP o ew
REP ew o
REP oo ew
REP ew oo
REP ew ui
REP ui ew
REP oo ui
REP ui oo
REP ew u
REP u ew
REP oo u
REP u oo
REP u oe
REP oe u
REP u ieu
REP ieu u
REP ue ew
REP ew ue
REP uff ough
REP oo ieu
REP ieu oo
REP ier ear
REP ear ier
REP ear air
REP air ear
REP w qu
REP qu w
REP z ss
REP ss z
REP shun tion
REP shun sion
REP shun cion

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
ignore_case: true
strict_affix_parsing: true