Add support for `daitch_mokotoff`
[Daitch Mokotoff](https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex) support has been added in Lucene 5. So we can now support it as well.
This commit is contained in:
parent
b855e7d14e
commit
52bf365013
|
@ -42,7 +42,7 @@ The `phonetic` token filter takes the following settings:
|
|||
Which phonetic encoder to use. Accepts `metaphone` (default),
|
||||
`doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`,
|
||||
`caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`,
|
||||
`beidermorse`.
|
||||
`beidermorse`, `daitch_mokotoff`.
|
||||
|
||||
`replace`::
|
||||
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
|
|||
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
||||
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
|
@ -105,6 +104,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
this.encoder = new HaasePhonetik();
|
||||
} else if ("nysiis".equalsIgnoreCase(encodername)) {
|
||||
this.encoder = new Nysiis();
|
||||
} else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
|
||||
this.encoder = new DaitchMokotoffSoundex();
|
||||
} else {
|
||||
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
|
||||
}
|
||||
|
|
|
@ -28,3 +28,6 @@ index:
|
|||
nysiisfilter:
|
||||
type: phonetic
|
||||
encoder: nysiis
|
||||
daitch_mokotoff:
|
||||
type: phonetic
|
||||
encoder: daitch_mokotoff
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
# Integration tests for Phonetic analysis components
|
||||
#
|
||||
|
||||
|
||||
"Daitch Mokotoff":
|
||||
- do:
|
||||
indices.create:
|
||||
index: phonetic_sample
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
tokenizer: standard
|
||||
filter: ["standard", "lowercase", "daitch_mokotoff"]
|
||||
filter:
|
||||
daitch_mokotoff:
|
||||
type: phonetic
|
||||
encoder: daitch_mokotoff
|
||||
- do:
|
||||
cluster.health:
|
||||
wait_for_status: yellow
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: phonetic_sample
|
||||
analyzer: my_analyzer
|
||||
text: Moskowitz
|
||||
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: "645740" }
|
||||
|
Loading…
Reference in New Issue