migrate branch for analysis-phonetic

2015-06-05 13:12:21 +02:00 · 2015-06-05 13:12:21 +02:00 · 0d328b07bd
parent 1cb86507d4 8d9ff57bc3
commit 0d328b07bd
13 changed files with 1301 additions and 0 deletions
--- a/plugins/analysis-phonetic/README.md
+++ b/plugins/analysis-phonetic/README.md
@ -0,0 +1,93 @@
+Phonetic Analysis for Elasticsearch
+===================================
+
+The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch.
+
+In order to install the plugin, simply run: 
+
+```sh
+bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.5.0
+```
+
+
+| elasticsearch |Phonetic Analysis Plugin|   Docs     |  
+|---------------|-----------------------|------------|
+| master        |  Build from source    | See below  |
+| es-1.x        |  Build from source    | [2.6.0-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x)  |
+| es-1.5        |  2.5.0                | [2.5.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.5.0/#version-250-for-elasticsearch-15)                  |
+|    es-1.4              |     2.4.3         | [2.4.3](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.3/#version-243-for-elasticsearch-14)                  |
+| < 1.4.5       |  2.4.2                | [2.4.2](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14)                  |
+| < 1.4.3       |  2.4.1                | [2.4.1](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14)                  |
+| es-1.3        |  2.3.0                | [2.3.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch)  |
+| es-1.2        |  2.2.0                | [2.2.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch)  |
+| es-1.1        |  2.1.0                | [2.1.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch)  |
+| es-1.0        |  2.0.0                | [2.0.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.0.0/#phonetic-analysis-for-elasticsearch)  |
+| es-0.90       |  1.8.0                | [1.8.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v1.8.0/#phonetic-analysis-for-elasticsearch)  |
+
+To build a `SNAPSHOT` version, you need to build it with Maven:
+
+```bash
+mvn clean install
+plugin --install analysis-phonetic \
+       --url file:target/releases/elasticsearch-analysis-phonetic-X.X.X-SNAPSHOT.zip
+```
+
+## User guide
+
+A `phonetic` token filter that can be configured with different `encoder` types: 
+`metaphone`, `doublemetaphone`, `soundex`, `refinedsoundex`, 
+`caverphone1`, `caverphone2`, `cologne`, `nysiis`,
+`koelnerphonetik`, `haasephonetik`, `beidermorse`
+
+The `replace` parameter (defaults to `true`) controls if the token processed 
+should be replaced with the encoded one (set it to `true`), or added (set it to `false`).
+
+```js
+{
+    "index" : {
+        "analysis" : {
+            "analyzer" : {
+                "my_analyzer" : {
+                    "tokenizer" : "standard",
+                    "filter" : ["standard", "lowercase", "my_metaphone"]
+                }
+            },
+            "filter" : {
+                "my_metaphone" : {
+                    "type" : "phonetic",
+                    "encoder" : "metaphone",
+                    "replace" : false
+                }
+            }
+        }
+    }
+}
+```
+
+Note that `beidermorse` does not support `replace` parameter.
+
+
+Questions
+---------
+
+If you have questions or comments please use the [mailing list](https://groups.google.com/group/elasticsearch) instead
+of Github Issues tracker.
+
+License
+-------
+
+    This software is licensed under the Apache 2 license, quoted below.
+
+    Copyright 2009-2014 Elasticsearch <http://www.elasticsearch.org>
+
+    Licensed under the Apache License, Version 2.0 (the "License"); you may not
+    use this file except in compliance with the License. You may obtain a copy of
+    the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+    License for the specific language governing permissions and limitations under
+    the License.
--- a/plugins/analysis-phonetic/pom.xml
+++ b/plugins/analysis-phonetic/pom.xml
@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.elasticsearch.plugin</groupId>
+    <artifactId>elasticsearch-analysis-phonetic</artifactId>
+
+    <packaging>jar</packaging>
+    <name>Elasticsearch Phonetic Analysis plugin</name>
+    <description>The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch.</description>
+
+    <parent>
+        <groupId>org.elasticsearch</groupId>
+        <artifactId>elasticsearch-plugin</artifactId>
+        <version>2.0.0-SNAPSHOT</version>
+    </parent>
+
+    <properties>
+        <!-- You can add any specific project property here -->
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-phonetic</artifactId>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
--- a/plugins/analysis-phonetic/src/main/assemblies/plugin.xml
+++ b/plugins/analysis-phonetic/src/main/assemblies/plugin.xml
@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<assembly>
+    <id>plugin</id>
+    <formats>
+        <format>zip</format>
+    </formats>
+    <includeBaseDirectory>false</includeBaseDirectory>
+    <dependencySets>
+        <dependencySet>
+            <outputDirectory>/</outputDirectory>
+            <useProjectArtifact>true</useProjectArtifact>
+            <useTransitiveFiltering>true</useTransitiveFiltering>
+            <excludes>
+                <exclude>org.elasticsearch:elasticsearch</exclude>
+            </excludes>
+        </dependencySet>
+        <dependencySet>
+            <outputDirectory>/</outputDirectory>
+            <useProjectArtifact>true</useProjectArtifact>
+            <useTransitiveFiltering>true</useTransitiveFiltering>
+            <includes>
+                <include>org.apache.lucene:lucene-analyzers-phonetic</include>
+            </includes>
+        </dependencySet>
+    </dependencySets>
+</assembly>
--- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java
+++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java
@ -0,0 +1,30 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+/**
+ */
+public class PhoneticAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
+
+    @Override
+    public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
+        tokenFiltersBindings.processTokenFilter("phonetic", PhoneticTokenFilterFactory.class);
+    }
+}
--- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java
+++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java
@ -0,0 +1,131 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.apache.commons.codec.Encoder;
+import org.apache.commons.codec.language.*;
+import org.apache.commons.codec.language.bm.Languages.LanguageSet;
+import org.apache.commons.codec.language.bm.NameType;
+import org.apache.commons.codec.language.bm.PhoneticEngine;
+import org.apache.commons.codec.language.bm.RuleType;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
+import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
+import org.apache.lucene.analysis.phonetic.PhoneticFilter;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.inject.assistedinject.Assisted;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.analysis.phonetic.HaasePhonetik;
+import org.elasticsearch.index.analysis.phonetic.KoelnerPhonetik;
+import org.elasticsearch.index.analysis.phonetic.Nysiis;
+import org.elasticsearch.index.settings.IndexSettings;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+/**
+ *
+ */
+public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
+
+    private final Encoder encoder;
+    private final boolean replace;
+    private int maxcodelength;
+    private String[] languageset;
+    private NameType nametype;
+    private RuleType ruletype;
+
+    @Inject
+    public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
+        super(index, indexSettings, name, settings);
+        this.languageset = null;
+        this.nametype = null;
+        this.ruletype = null;
+        this.maxcodelength = 0;
+        this.replace = settings.getAsBoolean("replace", true);
+        // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
+        String encodername = settings.get("encoder", "metaphone"); 
+        if ("metaphone".equalsIgnoreCase(encodername)) {
+            this.encoder = new Metaphone();
+        } else if ("soundex".equalsIgnoreCase(encodername)) {
+            this.encoder = new Soundex();
+        } else if ("caverphone1".equalsIgnoreCase(encodername)) {
+            this.encoder = new Caverphone1();
+        } else if ("caverphone2".equalsIgnoreCase(encodername)) {
+            this.encoder = new Caverphone2();
+        } else if ("caverphone".equalsIgnoreCase(encodername)) {
+            this.encoder = new Caverphone2();
+        } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) {
+            this.encoder = new RefinedSoundex();
+        } else if ("cologne".equalsIgnoreCase(encodername)) {
+            this.encoder = new ColognePhonetic();
+        } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) {
+            this.encoder = null;
+            this.maxcodelength = settings.getAsInt("max_code_len", 4);
+        } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) {
+            this.encoder = null;
+            this.languageset = settings.getAsArray("languageset");
+            String ruleType = settings.get("rule_type", "approx");
+            if ("approx".equalsIgnoreCase(ruleType)) {
+                ruletype = RuleType.APPROX;
+            } else if ("exact".equalsIgnoreCase(ruleType)) {
+                ruletype = RuleType.EXACT;
+            } else {
+                throw new IllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder");
+            }
+            String nameType = settings.get("name_type", "generic");
+            if ("GENERIC".equalsIgnoreCase(nameType)) {
+                nametype = NameType.GENERIC;
+            } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
+                nametype = NameType.ASHKENAZI;
+            } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
+                nametype = NameType.SEPHARDIC;
+            }
+        } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
+            this.encoder = new KoelnerPhonetik();
+        } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
+            this.encoder = new HaasePhonetik();
+        } else if ("nysiis".equalsIgnoreCase(encodername)) {
+            this.encoder = new Nysiis();
+        } else {
+            throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
+        }
+    }
+
+    @Override
+    public TokenStream create(TokenStream tokenStream) {
+        if (encoder == null) {
+            if (ruletype != null && nametype != null) {
+                if (languageset != null) {
+                    final LanguageSet languages = LanguageSet.from(new HashSet(Arrays.asList(languageset)));
+                    return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), languages);
+                }
+                return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true));
+            }
+            if (maxcodelength > 0) {
+                return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace);
+            }
+        } else {
+            return new PhoneticFilter(tokenStream, encoder, !replace);
+        }
+        throw new IllegalArgumentException("encoder error");
+    }
+}
--- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java
+++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java
@ -0,0 +1,71 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis.phonetic;
+
+/**
+ * Ge&auml;nderter Algorithmus aus der Matching Toolbox von Rainer Schnell
+ * Java-Programmierung von J&ouml;rg Reiher
+ *
+ * Die Kölner Phonetik wurde für den Einsatz in Namensdatenbanken wie
+ * der Verwaltung eines Krankenhauses durch Martin Haase (Institut für
+ * Sprachwissenschaft, Universität zu Köln) und Kai Heitmann (Insitut für
+ * medizinische Statistik, Informatik und Epidemiologie, Köln)  überarbeitet.
+ * M. Haase und K. Heitmann. Die Erweiterte Kölner Phonetik. 526, 2000.
+ *
+ * nach: Martin Wilz, Aspekte der Kodierung phonetischer Ähnlichkeiten
+ * in deutschen Eigennamen, Magisterarbeit.
+ * http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
+ * 
+ * @author <a href="mailto:joergprante@gmail.com">J&ouml;rg Prante</a>
+ */
+public class HaasePhonetik extends KoelnerPhonetik {
+
+    private final static String[] HAASE_VARIATIONS_PATTERNS = {"OWN", "RB", "WSK", "A$", "O$", "SCH",
+        "GLI", "EAU$", "^CH", "AUX", "EUX", "ILLE"};
+    private final static String[] HAASE_VARIATIONS_REPLACEMENTS = {"AUN", "RW", "RSK", "AR", "OW", "CH",
+        "LI", "O", "SCH", "O", "O", "I"};
+
+    /**
+     *
+     * @return
+     */
+    @Override
+    protected String[] getPatterns() {
+        return HAASE_VARIATIONS_PATTERNS;
+    }
+
+    /**
+     * 
+     * @return
+     */
+    @Override
+    protected String[] getReplacements() {
+        return HAASE_VARIATIONS_REPLACEMENTS;
+    }
+
+    /**
+     *
+     * @return
+     */
+    @Override
+    protected char getCode() {
+        return '9';
+    }
+}
--- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java
+++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java
@ -0,0 +1,324 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis.phonetic;
+
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.StringEncoder;
+
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * K&ouml;lner Phonetik
+ *
+ * H.J. Postel, Die K&ouml;lner Phonetik. Ein Verfahren zu Identifizierung
+ * von Personennamen auf der Grundlage der Gestaltanalyse. IBM-Nachrichten 19 (1969), 925-931
+ *
+ * Algorithmus aus der Matching Toolbox von Rainer Schnell
+ * Java-Programmierung von J&ouml;rg Reiher
+ *
+ * mit &Auml;nderungen von Jörg Prante
+ * 
+ */
+public class KoelnerPhonetik implements StringEncoder {
+
+    private static final String[] POSTEL_VARIATIONS_PATTERNS = {"AUN", "OWN", "RB", "RW", "WSK", "RSK"};
+    private static final String[] POSTEL_VARIATIONS_REPLACEMENTS = {"OWN", "AUN", "RW", "RB", "RSK", "WSK"};
+    private Pattern[] variationsPatterns;
+    private boolean primary = false;
+    private final Set<Character> csz = new HashSet(Arrays.asList(
+            'C', 'S', 'Z'));
+    private final Set<Character> ckq = new HashSet(Arrays.asList(
+            'C', 'K', 'Q'));
+    private final Set<Character> aouhkxq = new HashSet(Arrays.asList(
+            'A', 'O', 'U', 'H', 'K', 'X', 'Q'));
+    private final Set<Character> ahkloqrux = new HashSet(Arrays.asList(
+            'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'));
+
+    /**
+     * Constructor for  Kölner Phonetik
+     */
+    public KoelnerPhonetik() {
+        init();
+    }
+    
+    /**
+     *
+     * @param useOnlyPrimaryCode
+     */
+    public KoelnerPhonetik(boolean useOnlyPrimaryCode) {
+        this();
+        this.primary = useOnlyPrimaryCode;
+    }
+
+    /**
+     * Get variation patterns
+     * 
+     * @return string array of variations
+     */
+    protected String[] getPatterns() {
+        return POSTEL_VARIATIONS_PATTERNS;
+    }
+
+    /**
+     *
+     * @return
+     */
+    protected String[] getReplacements() {
+        return POSTEL_VARIATIONS_REPLACEMENTS;
+    }
+
+    /**
+     *
+     * @return
+     */
+    protected char getCode() {
+        return '0';
+    }
+
+    /**
+     *
+     * @param o1
+     * @param o2
+     * @return
+     */
+    public double getRelativeValue(Object o1, Object o2) {
+        String[] kopho1 = code(expandUmlauts(o1.toString().toUpperCase(Locale.GERMANY)));
+        String[] kopho2 = code(expandUmlauts(o2.toString().toUpperCase(Locale.GERMANY)));
+        for (int i = 0; i < kopho1.length; i++) {
+            for (int ii = 0; ii < kopho2.length; ii++) {
+                if (kopho1[i].equals(kopho2[ii])) {
+                    return 1;
+                }
+            }
+        }
+        return 0;
+    }
+
+    @Override
+    public Object encode(Object str) throws EncoderException {
+        return encode((String) str);
+    }
+    
+    @Override
+    public String encode(String str) throws EncoderException {
+        if (str == null) return null;
+        String[] s = code(str.toString());
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < s.length; i++) {
+            sb.append(s[i]);
+            if (i < s.length - 1) {
+                sb.append('_');
+            }
+        }
+        return sb.toString();
+    }
+
+    
+    private void init() {
+        this.variationsPatterns = new Pattern[getPatterns().length];
+        for (int i = 0; i < getPatterns().length; i++) {
+            this.variationsPatterns[i] = Pattern.compile(getPatterns()[i]);
+        }
+    }
+    
+    private String[] code(String str) {
+        List<String> parts = partition(str);
+        String[] codes = new String[parts.size()];
+        int i = 0;
+        for (String s : parts) {
+            codes[i++] = substitute(s);
+        }
+        return codes;
+    }
+
+    private List<String> partition(String str) {
+        String primaryForm = str;
+        List<String> parts = new ArrayList();
+        parts.add(primaryForm.replaceAll("[^\\p{L}\\p{N}]", ""));
+        if (!primary) {
+            List<String> tmpParts = new ArrayList();
+            tmpParts.addAll((Arrays.asList(str.split("[\\p{Z}\\p{C}\\p{P}]"))));
+            int numberOfParts = tmpParts.size();
+            while (tmpParts.size() > 0) {
+                StringBuilder part = new StringBuilder();
+                for (int i = 0; i < tmpParts.size(); i++) {
+                    part.append(tmpParts.get(i));
+                    if (!(i + 1 == numberOfParts)) {
+                        parts.add(part.toString());
+                    }
+                }
+                tmpParts.remove(0);
+            }
+        }
+        List<String> variations = new ArrayList();
+        for (int i = 0; i < parts.size(); i++) {
+            List variation = getVariations(parts.get(i));
+            if (variation != null) {
+                variations.addAll(variation);
+            }
+        }
+        return variations;
+    }
+
+    private List getVariations(String str) {
+        int position = 0;
+        List<String> variations = new ArrayList();
+        variations.add("");
+        while (position < str.length()) {
+            int i = 0;
+            int substPos = -1;
+            while (substPos < position && i < getPatterns().length) {
+                Matcher m = variationsPatterns[i].matcher(str);
+                while (substPos < position && m.find()) {
+                    substPos = m.start();
+                }
+                i++;
+            }
+            if (substPos >= position) {
+                i--;
+                List<String> varNew = new ArrayList();
+                String prevPart = str.substring(position, substPos);
+                for (int ii = 0; ii < variations.size(); ii++) {
+                    String tmp = variations.get(ii);
+                    varNew.add(tmp.concat(prevPart + getReplacements()[i]));
+                    variations.set(ii, variations.get(ii) + prevPart + getPatterns()[i]);
+                }
+                variations.addAll(varNew);
+                position = substPos + getPatterns()[i].length();
+            } else {
+                for (int ii = 0; ii < variations.size(); ii++) {
+                    variations.set(ii, variations.get(ii) + str.substring(position, str.length()));
+                }
+                position = str.length();
+            }
+        }
+        return variations;
+    }
+
+    private String substitute(String str) {
+        String s = expandUmlauts(str.toUpperCase(Locale.GERMAN));
+        s = removeSequences(s);
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < s.length(); i++) {
+            char current = s.charAt(i);
+            char next = i + 1 < s.length() ? s.charAt(i + 1) : '_';
+            char prev = i > 0 ? s.charAt(i - 1) : '_';
+            switch (current) {
+                case 'A':
+                case 'E':
+                case 'I':
+                case 'J':
+                case 'Y':
+                case 'O':
+                case 'U':
+                    if (i == 0 || ((i == 1) && prev == 'H')) {
+                        sb.append(getCode());
+                    }
+                    break;
+                case 'P':
+                    sb.append(next == 'H' ? "33" : '1');
+                    break;
+                case 'B':
+                    sb.append('1');
+                    break;
+                case 'D':
+                case 'T':
+                    sb.append(csz.contains(next) ? '8' : '2');
+                    break;
+                case 'F':
+                case 'V':
+                case 'W':
+                    sb.append('3');
+                    break;
+                case 'G':
+                case 'K':
+                case 'Q':
+                    sb.append('4');
+                    break;
+                case 'C':
+                    if (i == 0) {
+                        sb.append(ahkloqrux.contains(next) ? '4' : '8');
+                    } else {
+                        sb.append(aouhkxq.contains(next) ? '4' : '8');
+                    }
+                    if (sb.length() >= 2 && sb.charAt(sb.length() - 2) == '8') {
+                        sb.setCharAt(sb.length() - 1, '8');
+                    }
+                    break;
+                case 'X':
+                    sb.append(i < 1 || !ckq.contains(prev) ? "48" : '8');
+                    break;
+                case 'L':
+                    sb.append('5');
+                    break;
+                case 'M':
+                case 'N':
+                    sb.append('6');
+                    break;
+                case 'R':
+                    sb.append('7');
+                    break;
+                case 'S':
+                case 'Z':
+                    sb.append('8');
+                    break;
+                case 'H':
+                    break;
+            }
+        }
+        s = sb.toString();
+        s = removeSequences(s);
+        return s;
+    }
+
+    /**
+     *
+     * @param str
+     * @return
+     */
+    private String expandUmlauts(String str) {
+        return str.replaceAll("\u00C4", "AE").replaceAll("\u00D6", "OE").replaceAll("\u00DC", "UE");
+    }
+
+    /**
+     *
+     * @param str
+     * @return
+     */
+    private String removeSequences(String str) {
+        if (str == null || str.length() == 0) {
+            return "";
+        }
+        int i = 0, j = 0;
+        StringBuilder sb = new StringBuilder().append(str.charAt(i++));
+        char c;
+        while (i < str.length()) {
+            c = str.charAt(i);
+            if (c != sb.charAt(j)) {
+                sb.append(c);
+                j++;
+            }
+            i++;
+        }
+        return sb.toString();
+    }
+}
--- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java
+++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java
@ -0,0 +1,329 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis.phonetic;
+
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.StringEncoder;
+
+import java.util.regex.Pattern;
+
+/**
+ *
+ * Taken from commons-codec trunk (unreleased yet)
+ *
+ * Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate
+ * similar names, but can also be used as a general purpose scheme to find word
+ * with similar phonemes.
+ *
+ * <p> NYSIIS features an accuracy increase of 2.7% over the traditional Soundex
+ * algorithm. </p>
+ *
+ * <p>Algorithm description:
+ * <pre>
+ * 1. Transcode first characters of name
+ *   1a. MAC ->   MCC
+ *   1b. KN  ->   NN
+ *   1c. K   ->   C
+ *   1d. PH  ->   FF
+ *   1e. PF  ->   FF
+ *   1f. SCH ->   SSS
+ * 2. Transcode last characters of name
+ *   2a. EE, IE          ->   Y
+ *   2b. DT,RT,RD,NT,ND  ->   D
+ * 3. First character of key = first character of name
+ * 4. Transcode remaining characters by following these rules, incrementing by one character each time
+ *   4a. EV  ->   AF  else A,E,I,O,U -> A
+ *   4b. Q   ->   G
+ *   4c. Z   ->   S
+ *   4d. M   ->   N
+ *   4e. KN  ->   N   else K -> C
+ *   4f. SCH ->   SSS
+ *   4g. PH  ->   FF
+ *   4h. H   ->   If previous or next is nonvowel, previous
+ *   4i. W   ->   If previous is vowel, previous
+ *   4j. Add current to key if current != last key character
+ * 5. If last character is S, remove it
+ * 6. If last characters are AY, replace with Y
+ * 7. If last character is A, remove it
+ * 8. Collapse all strings of repeated characters
+ * 9. Add original first character of name as first character of key
+ * </pre></p>
+ *
+ * @see <a href="http://en.wikipedia.org/wiki/NYSIIS">NYSIIS on Wikipedia</a>
+ * @see <a href="http://www.dropby.com/NYSIIS.html">NYSIIS on dropby.com</a>
+ *
+ */
+public class Nysiis implements StringEncoder {
+
+    private static final char[] CHARS_A = new char[]{'A'};
+    private static final char[] CHARS_AF = new char[]{'A', 'F'};
+    private static final char[] CHARS_C = new char[]{'C'};
+    private static final char[] CHARS_FF = new char[]{'F', 'F'};
+    private static final char[] CHARS_G = new char[]{'G'};
+    private static final char[] CHARS_N = new char[]{'N'};
+    private static final char[] CHARS_NN = new char[]{'N', 'N'};
+    private static final char[] CHARS_S = new char[]{'S'};
+    private static final char[] CHARS_SSS = new char[]{'S', 'S', 'S'};
+    private static final Pattern PAT_MAC = Pattern.compile("^MAC");
+    private static final Pattern PAT_KN = Pattern.compile("^KN");
+    private static final Pattern PAT_K = Pattern.compile("^K");
+    private static final Pattern PAT_PH_PF = Pattern.compile("^(PH|PF)");
+    private static final Pattern PAT_SCH = Pattern.compile("^SCH");
+    private static final Pattern PAT_EE_IE = Pattern.compile("(EE|IE)$");
+    private static final Pattern PAT_DT_ETC = Pattern.compile("(DT|RT|RD|NT|ND)$");
+    private static final char SPACE = ' ';
+    private static final int TRUE_LENGTH = 6;
+
+    /**
+     * Tests if the given character is a vowel.
+     *
+     * @param c the character to test
+     * @return {@code true} if the character is a vowel, {@code false} otherwise
+     */
+    private static boolean isVowel(final char c) {
+        return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
+    }
+
+    /**
+     * Transcodes the remaining parts of the String. The method operates on a
+     * sliding window, looking at 4 characters at a time: [i-1, i, i+1, i+2].
+     *
+     * @param prev the previous character
+     * @param curr the current character
+     * @param next the next character
+     * @param aNext the after next character
+     * @return a transcoded array of characters, starting from the current
+     * position
+     */
+    private static char[] transcodeRemaining(final char prev, final char curr, final char next, final char aNext) {
+        // 1. EV -> AF
+        if (curr == 'E' && next == 'V') {
+            return CHARS_AF;
+        }
+
+        // A, E, I, O, U -> A
+        if (isVowel(curr)) {
+            return CHARS_A;
+        }
+
+        // 2. Q -> G, Z -> S, M -> N
+        if (curr == 'Q') {
+            return CHARS_G;
+        } else if (curr == 'Z') {
+            return CHARS_S;
+        } else if (curr == 'M') {
+            return CHARS_N;
+        }
+
+        // 3. KN -> NN else K -> C
+        if (curr == 'K') {
+            if (next == 'N') {
+                return CHARS_NN;
+            } else {
+                return CHARS_C;
+            }
+        }
+
+        // 4. SCH -> SSS
+        if (curr == 'S' && next == 'C' && aNext == 'H') {
+            return CHARS_SSS;
+        }
+
+        // PH -> FF
+        if (curr == 'P' && next == 'H') {
+            return CHARS_FF;
+        }
+
+        // 5. H -> If previous or next is a non vowel, previous.
+        if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) {
+            return new char[]{prev};
+        }
+
+        // 6. W -> If previous is vowel, previous.
+        if (curr == 'W' && isVowel(prev)) {
+            return new char[]{prev};
+        }
+
+        return new char[]{curr};
+    }
+    /**
+     * Indicates the strict mode.
+     */
+    private final boolean strict;
+
+    /**
+     * Creates an instance of the {@link Nysiis} encoder with strict mode
+     * (original form), i.e. encoded strings have a maximum length of 6.
+     */
+    public Nysiis() {
+        this(true);
+    }
+
+    /**
+     * Create an instance of the {@link Nysiis} encoder with the specified
+     * strict mode:
+     *
+     * <ul> <li>{@code true}: encoded strings have a maximum length of 6</li> <li>{@code false}:
+     * encoded strings may have arbitrary length</li> </ul>
+     *
+     * @param strict the strict mode
+     */
+    public Nysiis(final boolean strict) {
+        this.strict = strict;
+    }
+
+    /**
+     * Encodes an Object using the NYSIIS algorithm. This method is provided in
+     * order to satisfy the requirements of the Encoder interface, and will
+     * throw an {@link EncoderException} if the supplied object is not of type
+     * {@link String}.
+     *
+     * @param obj Object to encode
+     * @return An object (or a {@link String}) containing the NYSIIS code which
+     * corresponds to the given String.
+     * @throws EncoderException if the parameter supplied is not of a {@link String}
+     * @throws IllegalArgumentException if a character is not mapped
+     */
+    @Override
+    public Object encode(Object obj) throws EncoderException {
+        if (!(obj instanceof String)) {
+            throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String");
+        }
+        return this.nysiis((String) obj);
+    }
+
+    /**
+     * Encodes a String using the NYSIIS algorithm.
+     *
+     * @param str A String object to encode
+     * @return A Nysiis code corresponding to the String supplied
+     * @throws IllegalArgumentException if a character is not mapped
+     */
+    @Override
+    public String encode(String str) {
+        return this.nysiis(str);
+    }
+
+    /**
+     * Indicates the strict mode for this {@link Nysiis} encoder.
+     *
+     * @return {@code true} if the encoder is configured for strict mode, {@code false}
+     * otherwise
+     */
+    public boolean isStrict() {
+        return this.strict;
+    }
+
+    /**
+     * Retrieves the NYSIIS code for a given String object.
+     *
+     * @param str String to encode using the NYSIIS algorithm
+     * @return A NYSIIS code for the String supplied
+     */
+    public String nysiis(String str) {
+        if (str == null) {
+            return null;
+        }
+
+        // Use the same clean rules as Soundex
+        str = clean(str);
+
+        if (str.length() == 0) {
+            return str;
+        }
+
+        // Translate first characters of name:
+        // MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS
+        str = PAT_MAC.matcher(str).replaceFirst("MCC");
+        str = PAT_KN.matcher(str).replaceFirst("NN");
+        str = PAT_K.matcher(str).replaceFirst("C");
+        str = PAT_PH_PF.matcher(str).replaceFirst("FF");
+        str = PAT_SCH.matcher(str).replaceFirst("SSS");
+
+        // Translate last characters of name:
+        // EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D
+        str = PAT_EE_IE.matcher(str).replaceFirst("Y");
+        str = PAT_DT_ETC.matcher(str).replaceFirst("D");
+
+        // First character of key = first character of name.
+        StringBuffer key = new StringBuffer(str.length());
+        key.append(str.charAt(0));
+
+        // Transcode remaining characters, incrementing by one character each time
+        final char[] chars = str.toCharArray();
+        final int len = chars.length;
+
+        for (int i = 1; i < len; i++) {
+            final char next = i < len - 1 ? chars[i + 1] : SPACE;
+            final char aNext = i < len - 2 ? chars[i + 2] : SPACE;
+            final char[] transcoded = transcodeRemaining(chars[i - 1], chars[i], next, aNext);
+            System.arraycopy(transcoded, 0, chars, i, transcoded.length);
+
+            // only append the current char to the key if it is different from the last one
+            if (chars[i] != chars[i - 1]) {
+                key.append(chars[i]);
+            }
+        }
+
+        if (key.length() > 1) {
+            char lastChar = key.charAt(key.length() - 1);
+
+            // If last character is S, remove it.
+            if (lastChar == 'S') {
+                key.deleteCharAt(key.length() - 1);
+                lastChar = key.charAt(key.length() - 1);
+            }
+
+            if (key.length() > 2) {
+                final char last2Char = key.charAt(key.length() - 2);
+                // If last characters are AY, replace with Y.
+                if (last2Char == 'A' && lastChar == 'Y') {
+                    key.deleteCharAt(key.length() - 2);
+                }
+            }
+
+            // If last character is A, remove it.
+            if (lastChar == 'A') {
+                key.deleteCharAt(key.length() - 1);
+            }
+        }
+
+        final String string = key.toString();
+        return this.isStrict() ? string.substring(0, Math.min(TRUE_LENGTH, string.length())) : string;
+    }
+
+    static String clean(String str) {
+        if (str == null || str.length() == 0) {
+            return str;
+        }
+        int len = str.length();
+        char[] chars = new char[len];
+        int count = 0;
+        for (int i = 0; i < len; i++) {
+            if (Character.isLetter(str.charAt(i))) {
+                chars[count++] = str.charAt(i);
+            }
+        }
+        if (count == len) {
+            return str.toUpperCase(java.util.Locale.ENGLISH);
+        }
+        return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH);
+    }
+}
--- a/plugins/analysis-phonetic/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java
+++ b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java
@ -0,0 +1,44 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.plugin.analysis;
+
+import org.elasticsearch.index.analysis.AnalysisModule;
+import org.elasticsearch.index.analysis.PhoneticAnalysisBinderProcessor;
+import org.elasticsearch.plugins.AbstractPlugin;
+
+/**
+ */
+public class AnalysisPhoneticPlugin extends AbstractPlugin {
+
+    @Override
+    public String name() {
+        return "analysis-phonetic";
+    }
+
+    @Override
+    public String description() {
+        return "Phonetic analysis support";
+    }
+
+    public void onModule(AnalysisModule module) {
+        module.addProcessor(new PhoneticAnalysisBinderProcessor());
+    }
+}
+
--- a/plugins/analysis-phonetic/src/main/resources/es-plugin.properties
+++ b/plugins/analysis-phonetic/src/main/resources/es-plugin.properties
@ -0,0 +1,3 @@
+plugin=org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin
+version=${project.version}
+lucene=${lucene.version}
--- a/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java
+++ b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java
@ -0,0 +1,72 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.inject.Injector;
+import org.elasticsearch.common.inject.ModulesBuilder;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.settings.SettingsModule;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.env.EnvironmentModule;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.IndexNameModule;
+import org.elasticsearch.index.settings.IndexSettingsModule;
+import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
+import org.elasticsearch.indices.analysis.IndicesAnalysisService;
+import org.elasticsearch.test.ElasticsearchTestCase;
+import org.hamcrest.MatcherAssert;
+import org.junit.Test;
+
+import static org.elasticsearch.common.settings.Settings.settingsBuilder;
+import static org.hamcrest.Matchers.instanceOf;
+
+/**
+ */
+public class SimplePhoneticAnalysisTests extends ElasticsearchTestCase {
+
+    @Test
+    public void testPhoneticTokenFilterFactory() {
+        Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/phonetic-1.yml")
+                .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+                .put("path.home", createTempDir())
+                .build();
+        AnalysisService analysisService = testSimpleConfiguration(settings);
+        TokenFilterFactory filterFactory = analysisService.tokenFilter("phonetic");
+        MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class));
+    }
+
+    private AnalysisService testSimpleConfiguration(Settings settings) {
+        Index index = new Index("test");
+
+        Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings),
+                new EnvironmentModule(new Environment(settings)),
+                new IndicesAnalysisModule()).createInjector();
+        Injector injector = new ModulesBuilder().add(
+                new IndexSettingsModule(index, settings),
+                new IndexNameModule(index),
+                new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class))
+                .addProcessor(new PhoneticAnalysisBinderProcessor())).createChildInjector(parentInjector);
+
+        AnalysisService analysisService = injector.getInstance(AnalysisService.class);
+        return analysisService;
+    }
+}
--- a/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java
+++ b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java
@ -0,0 +1,108 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.plugins.PluginsService;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.notNullValue;
+
+@ElasticsearchIntegrationTest.ClusterScope(numDataNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE)
+public class SimplePhoneticIntegrationTests extends ElasticsearchIntegrationTest {
+
+    @Override
+    protected Settings nodeSettings(int nodeOrdinal) {
+        return Settings.builder()
+                .put(super.nodeSettings(nodeOrdinal))
+                .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true)
+                .build();
+    }
+
+    @Override
+    public Settings indexSettings() {
+        Settings settings = Settings.builder()
+                .put(super.indexSettings())
+                .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
+                .putArray("index.analysis.analyzer.my_analyzer.filter", "standard", "lowercase", "my_metaphone")
+                .put("index.analysis.filter.my_metaphone.type", "phonetic")
+                .put("index.analysis.filter.my_metaphone.encoder", "metaphone")
+                .put("index.analysis.filter.my_metaphone.replace", false)
+                .build();
+
+        return settings;
+    }
+
+    @Test
+    public void testPhoneticAnalyzer() throws ExecutionException, InterruptedException {
+        createIndex("test");
+        ensureGreen("test");
+        AnalyzeResponse response = client().admin().indices()
+                .prepareAnalyze("hello world")
+                .setIndex("test")
+                .setAnalyzer("my_analyzer")
+                .execute().get();
+
+        assertThat(response, notNullValue());
+        assertThat(response.getTokens().size(), is(4));
+        assertThat(response.getTokens().get(0).getTerm(), is("HL"));
+        assertThat(response.getTokens().get(1).getTerm(), is("hello"));
+        assertThat(response.getTokens().get(2).getTerm(), is("WRLT"));
+        assertThat(response.getTokens().get(3).getTerm(), is("world"));
+    }
+
+    @Test
+    public void testPhoneticAnalyzerInMapping() throws ExecutionException, InterruptedException, IOException {
+        createIndex("test");
+        ensureGreen("test");
+        final XContentBuilder mapping = jsonBuilder().startObject()
+            .startObject("type")
+                .startObject("properties")
+                    .startObject("foo")
+                        .field("type", "string")
+                        .field("analyzer", "my_analyzer")
+                    .endObject()
+                .endObject()
+            .endObject()
+            .endObject();
+
+        client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get();
+
+        index("test", "type", "1", "foo", "hello world");
+        refresh();
+
+        SearchResponse response = client().prepareSearch("test").setQuery(
+                QueryBuilders.matchQuery("foo", "helllo")
+        ).execute().actionGet();
+
+        assertThat(response.getHits().getTotalHits(), is(1L));
+    }
+
+}
--- a/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml
+++ b/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml
@ -0,0 +1,30 @@
+index:
+  analysis:
+    filter:
+      doublemetaphonefilter:
+        type: phonetic
+        encoder: doublemetaphone
+      metaphonefilter:
+        type: phonetic
+        encoder: metaphone
+      soundexfilter:
+        type: phonetic
+        encoder: soundex
+      refinedsoundexfilter:
+        type: phonetic
+        encoder: refinedsoundex
+      caverphonefilter:
+        type: phonetic
+        encoder: caverphone
+      beidermorsefilter:
+        type: phonetic
+        encoder: beidermorse
+      koelnerphonetikfilter:
+        type: phonetic
+        encoder: koelnerphonetik
+      haasephonetikfilter:
+        type: phonetic
+        encoder: haasephonetik
+      nysiisfilter:
+        type: phonetic
+        encoder: nysiis