diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7fde7b4db3b..8854cbe9fae 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -79,6 +79,9 @@ New Features
* LUCENE-6053: Add Serbian analyzer. (Nikola Smolenski via Robert Muir, Mike McCandless)
+* LUCENE-4400: Add support for new NYSIIS Apache commons phonetic
+ codec (Thomas Neidhart via Mike McCandless)
+
API Changes
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
index b2620d9bb13..c357f723595 100644
--- a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
+++ b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.phonetic;
*/
import java.io.IOException;
-import java.lang.reflect.Method;
import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -29,6 +29,7 @@ import org.apache.commons.codec.language.Caverphone2;
import org.apache.commons.codec.language.ColognePhonetic;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
import org.apache.lucene.analysis.TokenStream;
@@ -46,8 +47,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* This takes one required argument, "encoder", and the rest are optional:
*
* - encoder
- required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0),
- * or "ColognePhonetic" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name either by
- * itself if it already contains a '.' or otherwise as in the same package as these others.
+ * "ColognePhonetic" or "Nysiis" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name
+ * either by itself if it already contains a '.' or otherwise as in the same package as these others.
* - inject
- (default=true) add tokens to the stream with the offset=0
* - maxCodeLength
- The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't
* support this then specifying this is an error.
@@ -82,6 +83,7 @@ public class PhoneticFilterFactory extends TokenFilterFactory implements Resourc
registry.put("RefinedSoundex".toUpperCase(Locale.ROOT), RefinedSoundex.class);
registry.put("Caverphone".toUpperCase(Locale.ROOT), Caverphone2.class);
registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class);
+ registry.put("Nysiis".toUpperCase(Locale.ROOT), Nysiis.class);
}
final boolean inject; //accessed by the test
diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
index a47e64990de..f5aff7a741e 100644
--- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
+++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
@@ -21,7 +21,12 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.commons.codec.Encoder;
-import org.apache.commons.codec.language.*;
+import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.DoubleMetaphone;
+import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
+import org.apache.commons.codec.language.RefinedSoundex;
+import org.apache.commons.codec.language.Soundex;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -59,6 +64,11 @@ public class TestPhoneticFilter extends BaseTokenStreamTestCase {
"TTA1111111", "Datha", "KLN1111111", "Carlene" });
assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+
+ assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
+ new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
+ assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg",
+ new String[] { "A", "B", "C", "EASGAS" });
}
diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
index c919da4b0f6..70b654d580d 100644
--- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
+++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
@@ -18,14 +18,12 @@ package org.apache.lucene.analysis.phonetic;
*/
import java.io.IOException;
-import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
-import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.Metaphone;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
@@ -164,6 +162,12 @@ public class TestPhoneticFilterFactory extends BaseTokenStreamTestCase {
"67", "Meir", "862", "Schmidt" });
assertAlgorithm("ColognePhonetic", "false", "Meier Schmitt Meir Schmidt",
new String[] { "67", "862", "67", "862" });
+
+ assertAlgorithm("Nysiis", "true", "Macintosh Knuth Bart Hurd",
+ new String[] { "MCANT", "Macintosh", "NAT", "Knuth",
+ "BAD", "Bart", "HAD", "Hurd" });
+ assertAlgorithm("Nysiis", "false", "Macintosh Knuth Bart Hurd",
+ new String[] { "MCANT", "NAT", "BAD", "HAD" });
}
/** Test that bogus arguments result in exception */