Analysis: Add phonetic encodder called `bm` or `beider_morse`, closes #1552.
This commit is contained in:
parent
2b838b808e
commit
a488424404
2
pom.xml
2
pom.xml
|
@ -124,7 +124,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-codec</groupId>
|
<groupId>commons-codec</groupId>
|
||||||
<artifactId>commons-codec</artifactId>
|
<artifactId>commons-codec</artifactId>
|
||||||
<version>1.5</version>
|
<version>1.6</version>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,9 @@ package org.elasticsearch.index.analysis.phonetic;
|
||||||
|
|
||||||
import org.apache.commons.codec.Encoder;
|
import org.apache.commons.codec.Encoder;
|
||||||
import org.apache.commons.codec.language.*;
|
import org.apache.commons.codec.language.*;
|
||||||
|
import org.apache.commons.codec.language.bm.BeiderMorseEncoder;
|
||||||
|
import org.apache.commons.codec.language.bm.NameType;
|
||||||
|
import org.apache.commons.codec.language.bm.RuleType;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -67,6 +70,25 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
|
DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
|
||||||
doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen()));
|
doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen()));
|
||||||
this.encoder = doubleMetaphone;
|
this.encoder = doubleMetaphone;
|
||||||
|
} else if ("bm".equalsIgnoreCase(encoder) || "beider_morse".equalsIgnoreCase(encoder)) {
|
||||||
|
BeiderMorseEncoder bm = new BeiderMorseEncoder();
|
||||||
|
String ruleType = settings.get("rule_type", "approx");
|
||||||
|
if ("approx".equalsIgnoreCase(ruleType)) {
|
||||||
|
bm.setRuleType(RuleType.APPROX);
|
||||||
|
} else if ("exact".equalsIgnoreCase(ruleType)) {
|
||||||
|
bm.setRuleType(RuleType.EXACT);
|
||||||
|
} else {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder");
|
||||||
|
}
|
||||||
|
String nameType = settings.get("name_type", "generic");
|
||||||
|
if ("GENERIC".equalsIgnoreCase(nameType)) {
|
||||||
|
bm.setNameType(NameType.GENERIC);
|
||||||
|
} else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
|
||||||
|
bm.setNameType(NameType.ASHKENAZI);
|
||||||
|
} else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
|
||||||
|
bm.setNameType(NameType.SEPHARDIC);
|
||||||
|
}
|
||||||
|
this.encoder = bm;
|
||||||
} else {
|
} else {
|
||||||
throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encoder + "] for phonetic token filter");
|
throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encoder + "] for phonetic token filter");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue