Analysis: Improve Hunspell error messages
The Hunspell service would throw a confusing error message if more than one affix file was present. This commit distinguishes between the two error cases: where there are no affix files and when there are too many affix files. Also implements lazy dictionary loading, which was used in the tests but not implemented. Closes #6850
This commit is contained in:
parent
74927adced
commit
6e70edb0a4
|
@ -5,10 +5,10 @@ Basic support for hunspell stemming. Hunspell dictionaries will be
|
||||||
picked up from a dedicated hunspell directory on the filesystem
|
picked up from a dedicated hunspell directory on the filesystem
|
||||||
(defaults to `<path.conf>/hunspell`). Each dictionary is expected to
|
(defaults to `<path.conf>/hunspell`). Each dictionary is expected to
|
||||||
have its own directory named after its associated locale (language).
|
have its own directory named after its associated locale (language).
|
||||||
This dictionary directory is expected to hold both the `*.aff` and `*.dic`
|
This dictionary directory is expected to hold a single `*.aff` and
|
||||||
files (all of which will automatically be picked up). For example,
|
one or more `*.dic` files (all of which will automatically be picked up).
|
||||||
assuming the default hunspell location is used, the following directory
|
For example, assuming the default hunspell location is used, the
|
||||||
layout will define the `en_US` dictionary:
|
following directory layout will define the `en_US` dictionary:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -25,7 +25,7 @@ _elasticsearch.yml_.
|
||||||
|
|
||||||
Each dictionary can be configured with one setting:
|
Each dictionary can be configured with one setting:
|
||||||
|
|
||||||
`ignore_case`::
|
`ignore_case`::
|
||||||
If true, dictionary matching will be case insensitive
|
If true, dictionary matching will be case insensitive
|
||||||
(defaults to `false`)
|
(defaults to `false`)
|
||||||
|
|
||||||
|
@ -67,20 +67,20 @@ settings:
|
||||||
|
|
||||||
The hunspell token filter accepts four options:
|
The hunspell token filter accepts four options:
|
||||||
|
|
||||||
`locale`::
|
`locale`::
|
||||||
A locale for this filter. If this is unset, the `lang` or
|
A locale for this filter. If this is unset, the `lang` or
|
||||||
`language` are used instead - so one of these has to be set.
|
`language` are used instead - so one of these has to be set.
|
||||||
|
|
||||||
`dictionary`::
|
`dictionary`::
|
||||||
The name of a dictionary. The path to your hunspell
|
The name of a dictionary. The path to your hunspell
|
||||||
dictionaries should be configured via
|
dictionaries should be configured via
|
||||||
`indices.analysis.hunspell.dictionary.location` before.
|
`indices.analysis.hunspell.dictionary.location` before.
|
||||||
|
|
||||||
`dedup`::
|
`dedup`::
|
||||||
If only unique terms should be returned, this needs to be
|
If only unique terms should be returned, this needs to be
|
||||||
set to `true`. Defaults to `true`.
|
set to `true`. Defaults to `true`.
|
||||||
|
|
||||||
`longest_only`::
|
`longest_only`::
|
||||||
If only the longest term should be returned, set this to `true`.
|
If only the longest term should be returned, set this to `true`.
|
||||||
Defaults to `false`: all possible stems are returned.
|
Defaults to `false`: all possible stems are returned.
|
||||||
|
|
||||||
|
@ -88,6 +88,16 @@ NOTE: As opposed to the snowball stemmers (which are algorithm based)
|
||||||
this is a dictionary lookup based stemmer and therefore the quality of
|
this is a dictionary lookup based stemmer and therefore the quality of
|
||||||
the stemming is determined by the quality of the dictionary.
|
the stemming is determined by the quality of the dictionary.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
==== Dictionary loading
|
||||||
|
|
||||||
|
By default, the configured (`indices.analysis.hunspell.dictionary.location`)
|
||||||
|
or default Hunspell directory (`config/hunspell/`) is checked for dictionaries
|
||||||
|
when the node starts up, and any dictionaries are automatically loaded.
|
||||||
|
|
||||||
|
Dictionary loading can be deferred until they are actually used by setting
|
||||||
|
`indices.analysis.hunspell.dictionary.lazy` to `true`in the config file.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
==== References
|
==== References
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
private final boolean longestOnly;
|
private final boolean longestOnly;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
|
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
|
|
||||||
String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
|
String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
|
||||||
|
@ -64,9 +64,9 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
public boolean dedup() {
|
public boolean dedup() {
|
||||||
return dedup;
|
return dedup;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean longestOnly() {
|
public boolean longestOnly() {
|
||||||
return longestOnly;
|
return longestOnly;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,9 @@ public class HunspellService extends AbstractComponent {
|
||||||
|
|
||||||
private final static DictionaryFileFilter DIC_FILE_FILTER = new DictionaryFileFilter();
|
private final static DictionaryFileFilter DIC_FILE_FILTER = new DictionaryFileFilter();
|
||||||
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
|
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
|
||||||
|
public final static String HUNSPELL_LAZY_LOAD = "indices.analysis.hunspell.dictionary.lazy";
|
||||||
|
public final static String HUNSPELL_IGNORE_CASE = "indices.analysis.hunspell.dictionary.ignore_case";
|
||||||
|
public final static String HUNSPELL_LOCATION = "indices.analysis.hunspell.dictionary.location";
|
||||||
private final LoadingCache<String, Dictionary> dictionaries;
|
private final LoadingCache<String, Dictionary> dictionaries;
|
||||||
private final Map<String, Dictionary> knownDictionaries;
|
private final Map<String, Dictionary> knownDictionaries;
|
||||||
|
|
||||||
|
@ -82,7 +84,7 @@ public class HunspellService extends AbstractComponent {
|
||||||
super(settings);
|
super(settings);
|
||||||
this.knownDictionaries = knownDictionaries;
|
this.knownDictionaries = knownDictionaries;
|
||||||
this.hunspellDir = resolveHunspellDirectory(settings, env);
|
this.hunspellDir = resolveHunspellDirectory(settings, env);
|
||||||
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
|
this.defaultIgnoreCase = settings.getAsBoolean(HUNSPELL_IGNORE_CASE, false);
|
||||||
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, Dictionary>() {
|
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, Dictionary>() {
|
||||||
@Override
|
@Override
|
||||||
public Dictionary load(String locale) throws Exception {
|
public Dictionary load(String locale) throws Exception {
|
||||||
|
@ -93,7 +95,9 @@ public class HunspellService extends AbstractComponent {
|
||||||
return dictionary;
|
return dictionary;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
scanAndLoadDictionaries();
|
if (!settings.getAsBoolean(HUNSPELL_LAZY_LOAD, false)) {
|
||||||
|
scanAndLoadDictionaries();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -101,12 +105,12 @@ public class HunspellService extends AbstractComponent {
|
||||||
*
|
*
|
||||||
* @param locale The name of the locale
|
* @param locale The name of the locale
|
||||||
*/
|
*/
|
||||||
public Dictionary getDictionary(String locale) {
|
public Dictionary getDictionary(String locale) {
|
||||||
return dictionaries.getUnchecked(locale);
|
return dictionaries.getUnchecked(locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
private File resolveHunspellDirectory(Settings settings, Environment env) {
|
private File resolveHunspellDirectory(Settings settings, Environment env) {
|
||||||
String location = settings.get("indices.analysis.hunspell.dictionary.location", null);
|
String location = settings.get(HUNSPELL_LOCATION, null);
|
||||||
if (location != null) {
|
if (location != null) {
|
||||||
return new File(location);
|
return new File(location);
|
||||||
}
|
}
|
||||||
|
@ -120,7 +124,7 @@ public class HunspellService extends AbstractComponent {
|
||||||
if (hunspellDir.exists() && hunspellDir.isDirectory()) {
|
if (hunspellDir.exists() && hunspellDir.isDirectory()) {
|
||||||
for (File file : hunspellDir.listFiles()) {
|
for (File file : hunspellDir.listFiles()) {
|
||||||
if (file.isDirectory()) {
|
if (file.isDirectory()) {
|
||||||
if (file.list(AFFIX_FILE_FILTER).length > 0) { // just making sure it's indeed a dictionary dir
|
if (file.list(DIC_FILE_FILTER).length > 0) { // just making sure it's indeed a dictionary dir
|
||||||
dictionaries.getUnchecked(file.getName());
|
dictionaries.getUnchecked(file.getName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -153,9 +157,12 @@ public class HunspellService extends AbstractComponent {
|
||||||
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
|
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
|
||||||
|
|
||||||
File[] affixFiles = dicDir.listFiles(AFFIX_FILE_FILTER);
|
File[] affixFiles = dicDir.listFiles(AFFIX_FILE_FILTER);
|
||||||
if (affixFiles.length != 1) {
|
if (affixFiles.length == 0) {
|
||||||
throw new ElasticsearchException(String.format(Locale.ROOT, "Missing affix file for hunspell dictionary [%s]", locale));
|
throw new ElasticsearchException(String.format(Locale.ROOT, "Missing affix file for hunspell dictionary [%s]", locale));
|
||||||
}
|
}
|
||||||
|
if (affixFiles.length != 1) {
|
||||||
|
throw new ElasticsearchException(String.format(Locale.ROOT, "Too many affix files exist for hunspell dictionary [%s]", locale));
|
||||||
|
}
|
||||||
InputStream affixStream = null;
|
InputStream affixStream = null;
|
||||||
|
|
||||||
File[] dicFiles = dicDir.listFiles(DIC_FILE_FILTER);
|
File[] dicFiles = dicDir.listFiles(DIC_FILE_FILTER);
|
||||||
|
|
|
@ -19,16 +19,20 @@
|
||||||
package org.elasticsearch.indices.analyze;
|
package org.elasticsearch.indices.analyze;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.indices.analysis.HunspellService;
|
import org.elasticsearch.indices.analysis.HunspellService;
|
||||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||||
import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
|
import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
|
||||||
|
import org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
|
||||||
|
import org.hamcrest.Matchers;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
import static org.elasticsearch.test.ElasticsearchIntegrationTest.*;
|
import static org.elasticsearch.indices.analysis.HunspellService.*;
|
||||||
import static org.hamcrest.Matchers.notNullValue;
|
import static org.hamcrest.Matchers.notNullValue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -41,8 +45,8 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
|
||||||
public void testLocaleDirectoryWithNodeLevelConfig() throws Exception {
|
public void testLocaleDirectoryWithNodeLevelConfig() throws Exception {
|
||||||
Settings settings = ImmutableSettings.settingsBuilder()
|
Settings settings = ImmutableSettings.settingsBuilder()
|
||||||
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
||||||
.put("indices.analysis.hunspell.dictionary.lazy", true)
|
.put(HUNSPELL_LAZY_LOAD, randomBoolean())
|
||||||
.put("indices.analysis.hunspell.dictionary.ignore_case", true)
|
.put(HUNSPELL_IGNORE_CASE, true)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
internalCluster().startNode(settings);
|
internalCluster().startNode(settings);
|
||||||
|
@ -55,8 +59,8 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
|
||||||
public void testLocaleDirectoryWithLocaleSpecificConfig() throws Exception {
|
public void testLocaleDirectoryWithLocaleSpecificConfig() throws Exception {
|
||||||
Settings settings = ImmutableSettings.settingsBuilder()
|
Settings settings = ImmutableSettings.settingsBuilder()
|
||||||
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
||||||
.put("indices.analysis.hunspell.dictionary.lazy", true)
|
.put(HUNSPELL_LAZY_LOAD, randomBoolean())
|
||||||
.put("indices.analysis.hunspell.dictionary.ignore_case", true)
|
.put(HUNSPELL_IGNORE_CASE, true)
|
||||||
.put("indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing", false)
|
.put("indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing", false)
|
||||||
.put("indices.analysis.hunspell.dictionary.en_US.ignore_case", false)
|
.put("indices.analysis.hunspell.dictionary.en_US.ignore_case", false)
|
||||||
.build();
|
.build();
|
||||||
|
@ -77,14 +81,51 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
|
||||||
@Test
|
@Test
|
||||||
public void testCustomizeLocaleDirectory() throws Exception {
|
public void testCustomizeLocaleDirectory() throws Exception {
|
||||||
Settings settings = ImmutableSettings.settingsBuilder()
|
Settings settings = ImmutableSettings.settingsBuilder()
|
||||||
.put("indices.analysis.hunspell.dictionary.location", getResource("/indices/analyze/conf_dir/hunspell"))
|
.put(HUNSPELL_LOCATION, getResource("/indices/analyze/conf_dir/hunspell"))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
internalCluster().startNode(settings);
|
internalCluster().startNode(settings);
|
||||||
Dictionary dictionary = internalCluster().getInstance(HunspellService.class).getDictionary("en_US");
|
Dictionary dictionary = internalCluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||||
assertThat(dictionary, notNullValue());
|
assertThat(dictionary, notNullValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDicWithNoAff() throws Exception {
|
||||||
|
Settings settings = ImmutableSettings.settingsBuilder()
|
||||||
|
.put("path.conf", getResource("/indices/analyze/no_aff_conf_dir"))
|
||||||
|
.put(HUNSPELL_LAZY_LOAD, randomBoolean())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Dictionary dictionary = null;
|
||||||
|
try {
|
||||||
|
internalCluster().startNode(settings);
|
||||||
|
dictionary = internalCluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||||
|
fail("Missing affix file didn't throw an error");
|
||||||
|
}
|
||||||
|
catch (Throwable t) {
|
||||||
|
assertNull(dictionary);
|
||||||
|
assertThat(ExceptionsHelper.unwrap(t, ElasticsearchException.class).toString(), Matchers.containsString("Missing affix file"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDicWithTwoAffs() throws Exception {
|
||||||
|
Settings settings = ImmutableSettings.settingsBuilder()
|
||||||
|
.put("path.conf", getResource("/indices/analyze/two_aff_conf_dir"))
|
||||||
|
.put(HUNSPELL_LAZY_LOAD, randomBoolean())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Dictionary dictionary = null;
|
||||||
|
try {
|
||||||
|
internalCluster().startNode(settings);
|
||||||
|
dictionary = internalCluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||||
|
fail("Multiple affix files didn't throw an error");
|
||||||
|
} catch (Throwable t) {
|
||||||
|
assertNull(dictionary);
|
||||||
|
assertThat(ExceptionsHelper.unwrap(t, ElasticsearchException.class).toString(), Matchers.containsString("Too many affix files"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: open up a getter on Dictionary
|
// TODO: open up a getter on Dictionary
|
||||||
private void assertIgnoreCase(boolean expected, Dictionary dictionary) throws Exception {
|
private void assertIgnoreCase(boolean expected, Dictionary dictionary) throws Exception {
|
||||||
Field f = Dictionary.class.getDeclaredField("ignoreCase");
|
Field f = Dictionary.class.getDeclaredField("ignoreCase");
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,201 @@
|
||||||
|
SET ISO8859-1
|
||||||
|
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
|
||||||
|
NOSUGGEST !
|
||||||
|
|
||||||
|
# ordinal numbers
|
||||||
|
COMPOUNDMIN 1
|
||||||
|
# only in compounds: 1th, 2th, 3th
|
||||||
|
ONLYINCOMPOUND c
|
||||||
|
# compound rules:
|
||||||
|
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
|
||||||
|
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
|
||||||
|
COMPOUNDRULE 2
|
||||||
|
COMPOUNDRULE n*1t
|
||||||
|
COMPOUNDRULE n*mp
|
||||||
|
WORDCHARS 0123456789
|
||||||
|
|
||||||
|
PFX A Y 1
|
||||||
|
PFX A 0 re .
|
||||||
|
|
||||||
|
PFX I Y 1
|
||||||
|
PFX I 0 in .
|
||||||
|
|
||||||
|
PFX U Y 1
|
||||||
|
PFX U 0 un .
|
||||||
|
|
||||||
|
PFX C Y 1
|
||||||
|
PFX C 0 de .
|
||||||
|
|
||||||
|
PFX E Y 1
|
||||||
|
PFX E 0 dis .
|
||||||
|
|
||||||
|
PFX F Y 1
|
||||||
|
PFX F 0 con .
|
||||||
|
|
||||||
|
PFX K Y 1
|
||||||
|
PFX K 0 pro .
|
||||||
|
|
||||||
|
SFX V N 2
|
||||||
|
SFX V e ive e
|
||||||
|
SFX V 0 ive [^e]
|
||||||
|
|
||||||
|
SFX N Y 3
|
||||||
|
SFX N e ion e
|
||||||
|
SFX N y ication y
|
||||||
|
SFX N 0 en [^ey]
|
||||||
|
|
||||||
|
SFX X Y 3
|
||||||
|
SFX X e ions e
|
||||||
|
SFX X y ications y
|
||||||
|
SFX X 0 ens [^ey]
|
||||||
|
|
||||||
|
SFX H N 2
|
||||||
|
SFX H y ieth y
|
||||||
|
SFX H 0 th [^y]
|
||||||
|
|
||||||
|
SFX Y Y 1
|
||||||
|
SFX Y 0 ly .
|
||||||
|
|
||||||
|
SFX G Y 2
|
||||||
|
SFX G e ing e
|
||||||
|
SFX G 0 ing [^e]
|
||||||
|
|
||||||
|
SFX J Y 2
|
||||||
|
SFX J e ings e
|
||||||
|
SFX J 0 ings [^e]
|
||||||
|
|
||||||
|
SFX D Y 4
|
||||||
|
SFX D 0 d e
|
||||||
|
SFX D y ied [^aeiou]y
|
||||||
|
SFX D 0 ed [^ey]
|
||||||
|
SFX D 0 ed [aeiou]y
|
||||||
|
|
||||||
|
SFX T N 4
|
||||||
|
SFX T 0 st e
|
||||||
|
SFX T y iest [^aeiou]y
|
||||||
|
SFX T 0 est [aeiou]y
|
||||||
|
SFX T 0 est [^ey]
|
||||||
|
|
||||||
|
SFX R Y 4
|
||||||
|
SFX R 0 r e
|
||||||
|
SFX R y ier [^aeiou]y
|
||||||
|
SFX R 0 er [aeiou]y
|
||||||
|
SFX R 0 er [^ey]
|
||||||
|
|
||||||
|
SFX Z Y 4
|
||||||
|
SFX Z 0 rs e
|
||||||
|
SFX Z y iers [^aeiou]y
|
||||||
|
SFX Z 0 ers [aeiou]y
|
||||||
|
SFX Z 0 ers [^ey]
|
||||||
|
|
||||||
|
SFX S Y 4
|
||||||
|
SFX S y ies [^aeiou]y
|
||||||
|
SFX S 0 s [aeiou]y
|
||||||
|
SFX S 0 es [sxzh]
|
||||||
|
SFX S 0 s [^sxzhy]
|
||||||
|
|
||||||
|
SFX P Y 3
|
||||||
|
SFX P y iness [^aeiou]y
|
||||||
|
SFX P 0 ness [aeiou]y
|
||||||
|
SFX P 0 ness [^y]
|
||||||
|
|
||||||
|
SFX M Y 1
|
||||||
|
SFX M 0 's .
|
||||||
|
|
||||||
|
SFX B Y 3
|
||||||
|
SFX B 0 able [^aeiou]
|
||||||
|
SFX B 0 able ee
|
||||||
|
SFX B e able [^aeiou]e
|
||||||
|
|
||||||
|
SFX L Y 1
|
||||||
|
SFX L 0 ment .
|
||||||
|
|
||||||
|
REP 88
|
||||||
|
REP a ei
|
||||||
|
REP ei a
|
||||||
|
REP a ey
|
||||||
|
REP ey a
|
||||||
|
REP ai ie
|
||||||
|
REP ie ai
|
||||||
|
REP are air
|
||||||
|
REP are ear
|
||||||
|
REP are eir
|
||||||
|
REP air are
|
||||||
|
REP air ere
|
||||||
|
REP ere air
|
||||||
|
REP ere ear
|
||||||
|
REP ere eir
|
||||||
|
REP ear are
|
||||||
|
REP ear air
|
||||||
|
REP ear ere
|
||||||
|
REP eir are
|
||||||
|
REP eir ere
|
||||||
|
REP ch te
|
||||||
|
REP te ch
|
||||||
|
REP ch ti
|
||||||
|
REP ti ch
|
||||||
|
REP ch tu
|
||||||
|
REP tu ch
|
||||||
|
REP ch s
|
||||||
|
REP s ch
|
||||||
|
REP ch k
|
||||||
|
REP k ch
|
||||||
|
REP f ph
|
||||||
|
REP ph f
|
||||||
|
REP gh f
|
||||||
|
REP f gh
|
||||||
|
REP i igh
|
||||||
|
REP igh i
|
||||||
|
REP i uy
|
||||||
|
REP uy i
|
||||||
|
REP i ee
|
||||||
|
REP ee i
|
||||||
|
REP j di
|
||||||
|
REP di j
|
||||||
|
REP j gg
|
||||||
|
REP gg j
|
||||||
|
REP j ge
|
||||||
|
REP ge j
|
||||||
|
REP s ti
|
||||||
|
REP ti s
|
||||||
|
REP s ci
|
||||||
|
REP ci s
|
||||||
|
REP k cc
|
||||||
|
REP cc k
|
||||||
|
REP k qu
|
||||||
|
REP qu k
|
||||||
|
REP kw qu
|
||||||
|
REP o eau
|
||||||
|
REP eau o
|
||||||
|
REP o ew
|
||||||
|
REP ew o
|
||||||
|
REP oo ew
|
||||||
|
REP ew oo
|
||||||
|
REP ew ui
|
||||||
|
REP ui ew
|
||||||
|
REP oo ui
|
||||||
|
REP ui oo
|
||||||
|
REP ew u
|
||||||
|
REP u ew
|
||||||
|
REP oo u
|
||||||
|
REP u oo
|
||||||
|
REP u oe
|
||||||
|
REP oe u
|
||||||
|
REP u ieu
|
||||||
|
REP ieu u
|
||||||
|
REP ue ew
|
||||||
|
REP ew ue
|
||||||
|
REP uff ough
|
||||||
|
REP oo ieu
|
||||||
|
REP ieu oo
|
||||||
|
REP ier ear
|
||||||
|
REP ear ier
|
||||||
|
REP ear air
|
||||||
|
REP air ear
|
||||||
|
REP w qu
|
||||||
|
REP qu w
|
||||||
|
REP z ss
|
||||||
|
REP ss z
|
||||||
|
REP shun tion
|
||||||
|
REP shun sion
|
||||||
|
REP shun cion
|
|
@ -0,0 +1,201 @@
|
||||||
|
SET ISO8859-1
|
||||||
|
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
|
||||||
|
NOSUGGEST !
|
||||||
|
|
||||||
|
# ordinal numbers
|
||||||
|
COMPOUNDMIN 1
|
||||||
|
# only in compounds: 1th, 2th, 3th
|
||||||
|
ONLYINCOMPOUND c
|
||||||
|
# compound rules:
|
||||||
|
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
|
||||||
|
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
|
||||||
|
COMPOUNDRULE 2
|
||||||
|
COMPOUNDRULE n*1t
|
||||||
|
COMPOUNDRULE n*mp
|
||||||
|
WORDCHARS 0123456789
|
||||||
|
|
||||||
|
PFX A Y 1
|
||||||
|
PFX A 0 re .
|
||||||
|
|
||||||
|
PFX I Y 1
|
||||||
|
PFX I 0 in .
|
||||||
|
|
||||||
|
PFX U Y 1
|
||||||
|
PFX U 0 un .
|
||||||
|
|
||||||
|
PFX C Y 1
|
||||||
|
PFX C 0 de .
|
||||||
|
|
||||||
|
PFX E Y 1
|
||||||
|
PFX E 0 dis .
|
||||||
|
|
||||||
|
PFX F Y 1
|
||||||
|
PFX F 0 con .
|
||||||
|
|
||||||
|
PFX K Y 1
|
||||||
|
PFX K 0 pro .
|
||||||
|
|
||||||
|
SFX V N 2
|
||||||
|
SFX V e ive e
|
||||||
|
SFX V 0 ive [^e]
|
||||||
|
|
||||||
|
SFX N Y 3
|
||||||
|
SFX N e ion e
|
||||||
|
SFX N y ication y
|
||||||
|
SFX N 0 en [^ey]
|
||||||
|
|
||||||
|
SFX X Y 3
|
||||||
|
SFX X e ions e
|
||||||
|
SFX X y ications y
|
||||||
|
SFX X 0 ens [^ey]
|
||||||
|
|
||||||
|
SFX H N 2
|
||||||
|
SFX H y ieth y
|
||||||
|
SFX H 0 th [^y]
|
||||||
|
|
||||||
|
SFX Y Y 1
|
||||||
|
SFX Y 0 ly .
|
||||||
|
|
||||||
|
SFX G Y 2
|
||||||
|
SFX G e ing e
|
||||||
|
SFX G 0 ing [^e]
|
||||||
|
|
||||||
|
SFX J Y 2
|
||||||
|
SFX J e ings e
|
||||||
|
SFX J 0 ings [^e]
|
||||||
|
|
||||||
|
SFX D Y 4
|
||||||
|
SFX D 0 d e
|
||||||
|
SFX D y ied [^aeiou]y
|
||||||
|
SFX D 0 ed [^ey]
|
||||||
|
SFX D 0 ed [aeiou]y
|
||||||
|
|
||||||
|
SFX T N 4
|
||||||
|
SFX T 0 st e
|
||||||
|
SFX T y iest [^aeiou]y
|
||||||
|
SFX T 0 est [aeiou]y
|
||||||
|
SFX T 0 est [^ey]
|
||||||
|
|
||||||
|
SFX R Y 4
|
||||||
|
SFX R 0 r e
|
||||||
|
SFX R y ier [^aeiou]y
|
||||||
|
SFX R 0 er [aeiou]y
|
||||||
|
SFX R 0 er [^ey]
|
||||||
|
|
||||||
|
SFX Z Y 4
|
||||||
|
SFX Z 0 rs e
|
||||||
|
SFX Z y iers [^aeiou]y
|
||||||
|
SFX Z 0 ers [aeiou]y
|
||||||
|
SFX Z 0 ers [^ey]
|
||||||
|
|
||||||
|
SFX S Y 4
|
||||||
|
SFX S y ies [^aeiou]y
|
||||||
|
SFX S 0 s [aeiou]y
|
||||||
|
SFX S 0 es [sxzh]
|
||||||
|
SFX S 0 s [^sxzhy]
|
||||||
|
|
||||||
|
SFX P Y 3
|
||||||
|
SFX P y iness [^aeiou]y
|
||||||
|
SFX P 0 ness [aeiou]y
|
||||||
|
SFX P 0 ness [^y]
|
||||||
|
|
||||||
|
SFX M Y 1
|
||||||
|
SFX M 0 's .
|
||||||
|
|
||||||
|
SFX B Y 3
|
||||||
|
SFX B 0 able [^aeiou]
|
||||||
|
SFX B 0 able ee
|
||||||
|
SFX B e able [^aeiou]e
|
||||||
|
|
||||||
|
SFX L Y 1
|
||||||
|
SFX L 0 ment .
|
||||||
|
|
||||||
|
REP 88
|
||||||
|
REP a ei
|
||||||
|
REP ei a
|
||||||
|
REP a ey
|
||||||
|
REP ey a
|
||||||
|
REP ai ie
|
||||||
|
REP ie ai
|
||||||
|
REP are air
|
||||||
|
REP are ear
|
||||||
|
REP are eir
|
||||||
|
REP air are
|
||||||
|
REP air ere
|
||||||
|
REP ere air
|
||||||
|
REP ere ear
|
||||||
|
REP ere eir
|
||||||
|
REP ear are
|
||||||
|
REP ear air
|
||||||
|
REP ear ere
|
||||||
|
REP eir are
|
||||||
|
REP eir ere
|
||||||
|
REP ch te
|
||||||
|
REP te ch
|
||||||
|
REP ch ti
|
||||||
|
REP ti ch
|
||||||
|
REP ch tu
|
||||||
|
REP tu ch
|
||||||
|
REP ch s
|
||||||
|
REP s ch
|
||||||
|
REP ch k
|
||||||
|
REP k ch
|
||||||
|
REP f ph
|
||||||
|
REP ph f
|
||||||
|
REP gh f
|
||||||
|
REP f gh
|
||||||
|
REP i igh
|
||||||
|
REP igh i
|
||||||
|
REP i uy
|
||||||
|
REP uy i
|
||||||
|
REP i ee
|
||||||
|
REP ee i
|
||||||
|
REP j di
|
||||||
|
REP di j
|
||||||
|
REP j gg
|
||||||
|
REP gg j
|
||||||
|
REP j ge
|
||||||
|
REP ge j
|
||||||
|
REP s ti
|
||||||
|
REP ti s
|
||||||
|
REP s ci
|
||||||
|
REP ci s
|
||||||
|
REP k cc
|
||||||
|
REP cc k
|
||||||
|
REP k qu
|
||||||
|
REP qu k
|
||||||
|
REP kw qu
|
||||||
|
REP o eau
|
||||||
|
REP eau o
|
||||||
|
REP o ew
|
||||||
|
REP ew o
|
||||||
|
REP oo ew
|
||||||
|
REP ew oo
|
||||||
|
REP ew ui
|
||||||
|
REP ui ew
|
||||||
|
REP oo ui
|
||||||
|
REP ui oo
|
||||||
|
REP ew u
|
||||||
|
REP u ew
|
||||||
|
REP oo u
|
||||||
|
REP u oo
|
||||||
|
REP u oe
|
||||||
|
REP oe u
|
||||||
|
REP u ieu
|
||||||
|
REP ieu u
|
||||||
|
REP ue ew
|
||||||
|
REP ew ue
|
||||||
|
REP uff ough
|
||||||
|
REP oo ieu
|
||||||
|
REP ieu oo
|
||||||
|
REP ier ear
|
||||||
|
REP ear ier
|
||||||
|
REP ear air
|
||||||
|
REP air ear
|
||||||
|
REP w qu
|
||||||
|
REP qu w
|
||||||
|
REP z ss
|
||||||
|
REP ss z
|
||||||
|
REP shun tion
|
||||||
|
REP shun sion
|
||||||
|
REP shun cion
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue