Merge branch 'pr/phonetic_daitch_mokotoff'
This commit is contained in:
commit
33b0e662cc
|
@ -42,7 +42,7 @@ The `phonetic` token filter takes the following settings:
|
||||||
Which phonetic encoder to use. Accepts `metaphone` (default),
|
Which phonetic encoder to use. Accepts `metaphone` (default),
|
||||||
`doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`,
|
`doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`,
|
||||||
`caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`,
|
`caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`,
|
||||||
`beidermorse`.
|
`beidermorse`, `daitch_mokotoff`.
|
||||||
|
|
||||||
`replace`::
|
`replace`::
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
|
||||||
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
|
||||||
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
|
import org.apache.lucene.analysis.phonetic.PhoneticFilter;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
@ -105,6 +104,8 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
this.encoder = new HaasePhonetik();
|
this.encoder = new HaasePhonetik();
|
||||||
} else if ("nysiis".equalsIgnoreCase(encodername)) {
|
} else if ("nysiis".equalsIgnoreCase(encodername)) {
|
||||||
this.encoder = new Nysiis();
|
this.encoder = new Nysiis();
|
||||||
|
} else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
|
||||||
|
this.encoder = new DaitchMokotoffSoundex();
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
|
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,3 +28,6 @@ index:
|
||||||
nysiisfilter:
|
nysiisfilter:
|
||||||
type: phonetic
|
type: phonetic
|
||||||
encoder: nysiis
|
encoder: nysiis
|
||||||
|
daitch_mokotoff:
|
||||||
|
type: phonetic
|
||||||
|
encoder: daitch_mokotoff
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
# Integration tests for Phonetic analysis components
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
"Daitch Mokotoff":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: phonetic_sample
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
index:
|
||||||
|
analysis:
|
||||||
|
analyzer:
|
||||||
|
my_analyzer:
|
||||||
|
tokenizer: standard
|
||||||
|
filter: ["standard", "lowercase", "daitch_mokotoff"]
|
||||||
|
filter:
|
||||||
|
daitch_mokotoff:
|
||||||
|
type: phonetic
|
||||||
|
encoder: daitch_mokotoff
|
||||||
|
- do:
|
||||||
|
cluster.health:
|
||||||
|
wait_for_status: yellow
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
index: phonetic_sample
|
||||||
|
analyzer: my_analyzer
|
||||||
|
text: Moskowitz
|
||||||
|
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: "645740" }
|
||||||
|
|
Loading…
Reference in New Issue