Analysis: Add phonetic encodder called `bm` or `beider_morse`, closes #1552.
This commit is contained in:
parent
2b838b808e
commit
a488424404
2
pom.xml
2
pom.xml
|
@ -124,7 +124,7 @@
|
|||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.5</version>
|
||||
<version>1.6</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
|
|
|
@ -21,6 +21,9 @@ package org.elasticsearch.index.analysis.phonetic;
|
|||
|
||||
import org.apache.commons.codec.Encoder;
|
||||
import org.apache.commons.codec.language.*;
|
||||
import org.apache.commons.codec.language.bm.BeiderMorseEncoder;
|
||||
import org.apache.commons.codec.language.bm.NameType;
|
||||
import org.apache.commons.codec.language.bm.RuleType;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -67,6 +70,25 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
|
||||
doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen()));
|
||||
this.encoder = doubleMetaphone;
|
||||
} else if ("bm".equalsIgnoreCase(encoder) || "beider_morse".equalsIgnoreCase(encoder)) {
|
||||
BeiderMorseEncoder bm = new BeiderMorseEncoder();
|
||||
String ruleType = settings.get("rule_type", "approx");
|
||||
if ("approx".equalsIgnoreCase(ruleType)) {
|
||||
bm.setRuleType(RuleType.APPROX);
|
||||
} else if ("exact".equalsIgnoreCase(ruleType)) {
|
||||
bm.setRuleType(RuleType.EXACT);
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder");
|
||||
}
|
||||
String nameType = settings.get("name_type", "generic");
|
||||
if ("GENERIC".equalsIgnoreCase(nameType)) {
|
||||
bm.setNameType(NameType.GENERIC);
|
||||
} else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
|
||||
bm.setNameType(NameType.ASHKENAZI);
|
||||
} else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
|
||||
bm.setNameType(NameType.SEPHARDIC);
|
||||
}
|
||||
this.encoder = bm;
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encoder + "] for phonetic token filter");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue