From 52bf3650131e405c28b92e70e0e440d1dda304cc Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 18 Nov 2015 15:41:19 +0100 Subject: [PATCH] Add support for `daitch_mokotoff` [Daitch Mokotoff](https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex) support has been added in Lucene 5. So we can now support it as well. --- docs/plugins/analysis-phonetic.asciidoc | 2 +- .../analysis/PhoneticTokenFilterFactory.java | 3 +- .../index/analysis/phonetic-1.yml | 3 ++ .../analysis_phonetic/50_daitch_mokotoff.yaml | 32 +++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 plugins/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yaml diff --git a/docs/plugins/analysis-phonetic.asciidoc b/docs/plugins/analysis-phonetic.asciidoc index b15bfb8bd78..29fe9b4c7d1 100644 --- a/docs/plugins/analysis-phonetic.asciidoc +++ b/docs/plugins/analysis-phonetic.asciidoc @@ -42,7 +42,7 @@ The `phonetic` token filter takes the following settings: Which phonetic encoder to use. Accepts `metaphone` (default), `doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`, `caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`, - `beidermorse`. + `beidermorse`, `daitch_mokotoff`. `replace`:: diff --git a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java index e661a12db85..37f7e0cd214 100644 --- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java +++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -30,7 +30,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter; import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; import org.apache.lucene.analysis.phonetic.PhoneticFilter; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -105,6 +104,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); + } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { + this.encoder = new DaitchMokotoffSoundex(); } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } diff --git a/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml b/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml index 41a4e3fc59f..6c0a0763881 100644 --- a/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml +++ b/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml @@ -28,3 +28,6 @@ index: nysiisfilter: type: phonetic encoder: nysiis + daitch_mokotoff: + type: phonetic + encoder: daitch_mokotoff diff --git a/plugins/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yaml b/plugins/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yaml new file mode 100644 index 00000000000..b95138f2646 --- /dev/null +++ b/plugins/analysis-phonetic/src/test/resources/rest-api-spec/test/analysis_phonetic/50_daitch_mokotoff.yaml @@ -0,0 +1,32 @@ +# Integration tests for Phonetic analysis components +# + + +"Daitch Mokotoff": + - do: + indices.create: + index: phonetic_sample + body: + settings: + index: + analysis: + analyzer: + my_analyzer: + tokenizer: standard + filter: ["standard", "lowercase", "daitch_mokotoff"] + filter: + daitch_mokotoff: + type: phonetic + encoder: daitch_mokotoff + - do: + cluster.health: + wait_for_status: yellow + - do: + indices.analyze: + index: phonetic_sample + analyzer: my_analyzer + text: Moskowitz + + - length: { tokens: 1 } + - match: { tokens.0.token: "645740" } +