diff --git a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java index d02ac2ae2be..b63ad561a5a 100644 --- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java +++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -33,6 +33,7 @@ import org.apache.commons.codec.language.bm.PhoneticEngine; import org.apache.commons.codec.language.bm.RuleType; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.phonetic.BeiderMorseFilter; +import org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilter; import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; import org.apache.lucene.analysis.phonetic.PhoneticFilter; import org.elasticsearch.common.settings.Settings; @@ -53,6 +54,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { private List languageset; private NameType nametype; private RuleType ruletype; + private boolean isDaitchMokotoff; public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); @@ -60,6 +62,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { this.nametype = null; this.ruletype = null; this.maxcodelength = 0; + this.isDaitchMokotoff = false; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); @@ -106,7 +109,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { - this.encoder = new DaitchMokotoffSoundex(); + this.encoder = null; + this.isDaitchMokotoff = true; } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } @@ -115,6 +119,9 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { @Override public TokenStream create(TokenStream tokenStream) { if (encoder == null) { + if (isDaitchMokotoff) { + return new DaitchMokotoffSoundexFilter(tokenStream, !replace); + } if (ruletype != null && nametype != null) { LanguageSet langset = null; if (languageset != null && languageset.size() > 0) { diff --git a/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index e3877faee31..7fad525b33c 100644 --- a/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilter; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -72,4 +73,14 @@ public class SimplePhoneticAnalysisTests extends ESTestCase { "rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" }; BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); } + + public void testPhoneticTokenFilterDaitchMotokoff() throws IOException { + TokenFilterFactory filterFactory = analysis.tokenFilter.get("daitch_mokotoff"); + Tokenizer tokenizer = new WhitespaceTokenizer(); + tokenizer.setReader(new StringReader("chauptman")); + String[] expected = new String[] { "473660", "573660" }; + assertThat(filterFactory.create(tokenizer), instanceOf(DaitchMokotoffSoundexFilter.class)); + BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); + } + }