Add support for `daitch_mokotoff`

[Daitch Mokotoff](https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex) support has been added in Lucene 5.
So we can now support it as well.
This commit is contained in:
David Pilato 2015-11-18 15:41:19 +01:00
parent b855e7d14e
commit 52bf365013
4 changed files with 38 additions and 2 deletions

View File

@ -42,7 +42,7 @@ The `phonetic` token filter takes the following settings:
Which phonetic encoder to use. Accepts `metaphone` (default),
`doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`,
`caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`,
`beidermorse`.
`beidermorse`, `daitch_mokotoff`.
`replace`::

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
@ -105,6 +104,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
this.encoder = new HaasePhonetik();
} else if ("nysiis".equalsIgnoreCase(encodername)) {
this.encoder = new Nysiis();
} else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
this.encoder = new DaitchMokotoffSoundex();
} else {
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
}

View File

@ -28,3 +28,6 @@ index:
nysiisfilter:
type: phonetic
encoder: nysiis
daitch_mokotoff:
type: phonetic
encoder: daitch_mokotoff

View File

@ -0,0 +1,32 @@
# Integration tests for Phonetic analysis components
#
"Daitch Mokotoff":
- do:
indices.create:
index: phonetic_sample
body:
settings:
index:
analysis:
analyzer:
my_analyzer:
tokenizer: standard
filter: ["standard", "lowercase", "daitch_mokotoff"]
filter:
daitch_mokotoff:
type: phonetic
encoder: daitch_mokotoff
- do:
cluster.health:
wait_for_status: yellow
- do:
indices.analyze:
index: phonetic_sample
analyzer: my_analyzer
text: Moskowitz
- length: { tokens: 1 }
- match: { tokens.0.token: "645740" }