diff --git a/modules/analysis/CHANGES.txt b/modules/analysis/CHANGES.txt
index 8ba174d695d..fcbba83b34f 100644
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@@ -27,11 +27,14 @@ New Features
with text contained in the required words (inverse of StopFilter).
- o.a.l.analysis.miscellaneous.HyphenatedWordsFilter: A TokenFilter that puts
hyphenated words broken into two lines back together.
+ - o.a.l.analysis.miscellaneous.CapitalizationFilter: A TokenFilter that applies
+ capitalization rules to tokens.
- o.a.l.analysis.pattern: Package for pattern-based analysis, containing a
CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
- o.a.l.analysis.synonym.SynonymFilter: A synonym filter that supports multi-word
synonyms.
- (... in progress)
+ - o.a.l.analysis.phonetic: Package for phonetic search, containing various
+ phonetic encoders such as Double Metaphone.
* LUCENE-2413: Consolidated all Lucene analyzers into common.
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
@@ -60,7 +63,6 @@ New Features
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
- ... (in progress)
Build
diff --git a/modules/analysis/NOTICE.txt b/modules/analysis/NOTICE.txt
index 8b13cc06746..6abde9313c7 100644
--- a/modules/analysis/NOTICE.txt
+++ b/modules/analysis/NOTICE.txt
@@ -4,6 +4,10 @@ Copyright 2006 The Apache Software Foundation
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
+Includes software from other Apache Software Foundation projects,
+including, but not limited to:
+ - Apache Commons
+
The snowball stemmers in
common/src/java/net/sf/snowball
were developed by Martin Porter and Richard Boulton.
diff --git a/modules/analysis/README.txt b/modules/analysis/README.txt
index 53d3c34df90..85b4e9392c3 100644
--- a/modules/analysis/README.txt
+++ b/modules/analysis/README.txt
@@ -20,7 +20,12 @@ lucene-analyzers-common-XX.jar
lucene-analyzers-icu-XX.jar
An add-on analysis library that provides improved Unicode support via
International Components for Unicode (ICU). Note: this module depends on
- the ICU4j jar file (version > 4.4.0)
+ the ICU4j jar file (version >= 4.4.0)
+
+lucene-analyzers-phonetic-XX.jar
+ An add-on analysis library that provides phonetic encoders via Apache
+ Commons-Codec. Note: this module depends on the commons-codec jar
+ file (version >= 1.4)
lucene-analyzers-smartcn-XX.jar
An add-on analysis library that provides word segmentation for Simplified
@@ -32,12 +37,14 @@ lucene-analyzers-stempel-XX.jar
common/src/java
icu/src/java
+phonetic/src/java
smartcn/src/java
stempel/src/java
- The source code for the four libraries.
+ The source code for the ffve libraries.
common/src/test
icu/src/test
+phonetic/src/test
smartcn/src/test
stempel/src/test
- Unit tests for the four libraries.
+ Unit tests for the five libraries.
diff --git a/modules/analysis/build.xml b/modules/analysis/build.xml
index 750cfa90a26..599442d153f 100644
--- a/modules/analysis/build.xml
+++ b/modules/analysis/build.xml
@@ -35,6 +35,10 @@
+
+
+
+
@@ -44,29 +48,33 @@
-
+
+
+
+
+
@@ -76,6 +84,7 @@
+
@@ -83,6 +92,7 @@
+
@@ -90,6 +100,7 @@
+
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
new file mode 100644
index 00000000000..a41314ed891
--- /dev/null
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
@@ -0,0 +1,181 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+/**
+ * A filter to apply normal capitalization rules to Tokens. It will make the first letter
+ * capital and the rest lower case.
+ *
okPrefix;
+
+ private final int minWordLength;
+ private final int maxWordCount;
+ private final int maxTokenLength;
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ /**
+ * Creates a CapitalizationFilter with the default parameters.
+ *
+ * Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
+ * CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
+ */
+ public CapitalizationFilter(TokenStream in) {
+ this(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+ }
+
+ /**
+ * Creates a CapitalizationFilter with the specified parameters.
+ * @param in input tokenstream
+ * @param onlyFirstWord should each word be capitalized or all of the words?
+ * @param keep a keep word list. Each word that should be kept separated by whitespace.
+ * @param forceFirstLetter Force the first letter to be capitalized even if it is in the keep list.
+ * @param okPrefix do not change word capitalization if a word begins with something in this list.
+ * @param minWordLength how long the word needs to be to get capitalization applied. If the
+ * minWordLength is 3, "and" > "And" but "or" stays "or".
+ * @param maxWordCount if the token contains more then maxWordCount words, the capitalization is
+ * assumed to be correct.
+ * @param maxTokenLength ???
+ */
+ public CapitalizationFilter(TokenStream in, boolean onlyFirstWord, CharArraySet keep,
+ boolean forceFirstLetter, Collection okPrefix, int minWordLength,
+ int maxWordCount, int maxTokenLength) {
+ super(in);
+ this.onlyFirstWord = onlyFirstWord;
+ this.keep = keep;
+ this.forceFirstLetter = forceFirstLetter;
+ this.okPrefix = okPrefix;
+ this.minWordLength = minWordLength;
+ this.maxWordCount = maxWordCount;
+ this.maxTokenLength = maxTokenLength;
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (!input.incrementToken()) return false;
+
+ char[] termBuffer = termAtt.buffer();
+ int termBufferLength = termAtt.length();
+ char[] backup = null;
+
+ if (maxWordCount < DEFAULT_MAX_WORD_COUNT) {
+ //make a backup in case we exceed the word count
+ backup = new char[termBufferLength];
+ System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
+ }
+
+ if (termBufferLength < maxTokenLength) {
+ int wordCount = 0;
+
+ int lastWordStart = 0;
+ for (int i = 0; i < termBufferLength; i++) {
+ char c = termBuffer[i];
+ if (c <= ' ' || c == '.') {
+ int len = i - lastWordStart;
+ if (len > 0) {
+ processWord(termBuffer, lastWordStart, len, wordCount++);
+ lastWordStart = i + 1;
+ i++;
+ }
+ }
+ }
+
+ // process the last word
+ if (lastWordStart < termBufferLength) {
+ processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
+ }
+
+ if (wordCount > maxWordCount) {
+ termAtt.copyBuffer(backup, 0, termBufferLength);
+ }
+ }
+
+ return true;
+ }
+
+ private void processWord(char[] buffer, int offset, int length, int wordCount) {
+ if (length < 1) {
+ return;
+ }
+
+ if (onlyFirstWord && wordCount > 0) {
+ for (int i = 0; i < length; i++) {
+ buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
+
+ }
+ return;
+ }
+
+ if (keep != null && keep.contains(buffer, offset, length)) {
+ if (wordCount == 0 && forceFirstLetter) {
+ buffer[offset] = Character.toUpperCase(buffer[offset]);
+ }
+ return;
+ }
+
+ if (length < minWordLength) {
+ return;
+ }
+
+ if (okPrefix != null) {
+ for (char[] prefix : okPrefix) {
+ if (length >= prefix.length) { //don't bother checking if the buffer length is less than the prefix
+ boolean match = true;
+ for (int i = 0; i < prefix.length; i++) {
+ if (prefix[i] != buffer[offset + i]) {
+ match = false;
+ break;
+ }
+ }
+ if (match == true) {
+ return;
+ }
+ }
+ }
+ }
+
+ // We know it has at least one character
+ /*char[] chars = w.toCharArray();
+ StringBuilder word = new StringBuilder( w.length() );
+ word.append( Character.toUpperCase( chars[0] ) );*/
+ buffer[offset] = Character.toUpperCase(buffer[offset]);
+
+ for (int i = 1; i < length; i++) {
+ buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
+ }
+ //return word.toString();
+ }
+}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
new file mode 100644
index 00000000000..4d30d4bacb5
--- /dev/null
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;
+
+/** Tests {@link CapitalizationFilter} */
+public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
+ public void testCapitalization() throws Exception {
+ CharArraySet keep = new CharArraySet(TEST_VERSION_CURRENT,
+ Arrays.asList("and", "the", "it", "BIG"), false);
+
+ assertCapitalizesTo("kiTTEN", new String[] { "Kitten" },
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ assertCapitalizesTo("and", new String[] { "And" },
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ assertCapitalizesTo("AnD", new String[] { "And" },
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ //first is not forced, but it's not a keep word, either
+ assertCapitalizesTo("AnD", new String[] { "And" },
+ true, keep, false, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ assertCapitalizesTo("big", new String[] { "Big" },
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ assertCapitalizesTo("BIG", new String[] { "BIG" },
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ assertCapitalizesToKeyword("Hello thEre my Name is Ryan", "Hello there my name is ryan",
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ // now each token
+ assertCapitalizesTo("Hello thEre my Name is Ryan",
+ new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" },
+ false, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ // now only the long words
+ assertCapitalizesTo("Hello thEre my Name is Ryan",
+ new String[] { "Hello", "There", "my", "Name", "is", "Ryan" },
+ false, keep, true, null, 3, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ // without prefix
+ assertCapitalizesTo("McKinley",
+ new String[] { "Mckinley" },
+ true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ // Now try some prefixes
+ List okPrefix = new ArrayList();
+ okPrefix.add("McK".toCharArray());
+
+ assertCapitalizesTo("McKinley",
+ new String[] { "McKinley" },
+ true, keep, true, okPrefix, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ // now try some stuff with numbers
+ assertCapitalizesTo("1st 2nd third",
+ new String[] { "1st", "2nd", "Third" },
+ false, keep, false, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+
+ assertCapitalizesToKeyword("the The the", "The The the",
+ false, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+ }
+
+ static void assertCapitalizesTo(Tokenizer tokenizer, String expected[],
+ boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
+ Collection okPrefix, int minWordLength, int maxWordCount,
+ int maxTokenLength) throws IOException {
+ CapitalizationFilter filter = new CapitalizationFilter(tokenizer, onlyFirstWord, keep,
+ forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
+ assertTokenStreamContents(filter, expected);
+ }
+
+ static void assertCapitalizesTo(String input, String expected[],
+ boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
+ Collection okPrefix, int minWordLength, int maxWordCount,
+ int maxTokenLength) throws IOException {
+ assertCapitalizesTo(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ expected, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength,
+ maxWordCount, maxTokenLength);
+ }
+
+ static void assertCapitalizesToKeyword(String input, String expected,
+ boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
+ Collection okPrefix, int minWordLength, int maxWordCount,
+ int maxTokenLength) throws IOException {
+ assertCapitalizesTo(new KeywordTokenizer(new StringReader(input)),
+ new String[] { expected }, onlyFirstWord, keep, forceFirstLetter, okPrefix,
+ minWordLength, maxWordCount, maxTokenLength);
+ }
+}
diff --git a/modules/analysis/phonetic/build.xml b/modules/analysis/phonetic/build.xml
new file mode 100644
index 00000000000..9efd18a94b8
--- /dev/null
+++ b/modules/analysis/phonetic/build.xml
@@ -0,0 +1,63 @@
+
+
+
+
+
+
+
+ Provides phonetic encoding support via Apache Commons Codec.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ phonetic building dependency ${analyzers-common.jar}
+
+
+
diff --git a/modules/analysis/phonetic/lib/commons-codec-1.4.jar b/modules/analysis/phonetic/lib/commons-codec-1.4.jar
new file mode 100644
index 00000000000..97a58157492
--- /dev/null
+++ b/modules/analysis/phonetic/lib/commons-codec-1.4.jar
@@ -0,0 +1,2 @@
+AnyObjectId[458d432da88b0efeab640c229903fb5aad274044] was removed in git history.
+Apache SVN contains full history.
\ No newline at end of file
diff --git a/modules/analysis/phonetic/pom.xml.template b/modules/analysis/phonetic/pom.xml.template
new file mode 100644
index 00000000000..462c4a1073e
--- /dev/null
+++ b/modules/analysis/phonetic/pom.xml.template
@@ -0,0 +1,46 @@
+
+
+
+ 4.0.0
+
+ org.apache.lucene
+ lucene-contrib
+ @version@
+
+ org.apache.lucene
+ lucene-phonetic
+
+ Lucene Phonetic Filters
+
+ @version@
+
+ Provides phonetic encoding via Commons Codec.
+
+ jar
+
+
+ org.apache.commons
+ codec
+ ${codec-version}
+
+
+
diff --git a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java b/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
similarity index 96%
rename from solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
rename to modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
index d384d2c1ece..971c9b4f7a5 100644
--- a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
+++ b/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.phonetic;
import java.io.IOException;
import java.util.LinkedList;
@@ -35,7 +35,7 @@ public final class DoubleMetaphoneFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
- protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
+ public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
super(input);
this.encoder.setMaxCodeLen(maxCodeLength);
this.inject = inject;
diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java b/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
similarity index 93%
rename from solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
rename to modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
index a6d0a3bbe21..791def825d1 100644
--- a/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
+++ b/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.phonetic;
import org.apache.commons.codec.Encoder;
import org.apache.lucene.analysis.TokenFilter;
@@ -28,23 +28,19 @@ import java.io.IOException;
/**
* Create tokens for phonetic matches. See:
* http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html
- *
- * @version $Id$
*/
public final class PhoneticFilter extends TokenFilter
{
protected boolean inject = true;
protected Encoder encoder = null;
- protected String name = null;
protected State save = null;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
- public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
+ public PhoneticFilter(TokenStream in, Encoder encoder, boolean inject) {
super(in);
this.encoder = encoder;
- this.name = name;
this.inject = inject;
}
diff --git a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java b/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
similarity index 72%
rename from solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java
rename to modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
index 35d03b1378c..e99b9b5f90a 100644
--- a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterTest.java
+++ b/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
@@ -14,52 +14,53 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.phonetic;
import java.io.StringReader;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-public class DoubleMetaphoneFilterTest extends BaseTokenTestCase {
+public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase {
public void testSize4FalseInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
assertTokenStreamContents(filter, new String[] { "ANTR" });
}
public void testSize4TrueInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
TokenStream filter = new DoubleMetaphoneFilter(stream, 4, true);
assertTokenStreamContents(filter, new String[] { "international", "ANTR" });
}
public void testAlternateInjectFalse() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Kuczewski"));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Kuczewski"));
TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
assertTokenStreamContents(filter, new String[] { "KSSK", "KXFS" });
}
public void testSize8FalseInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
assertTokenStreamContents(filter, new String[] { "ANTRNXNL" });
}
public void testNonConvertableStringsWithInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("12345 #$%@#^%&"));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
TokenStream filter = new DoubleMetaphoneFilter(stream, 8, true);
assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
}
public void testNonConvertableStringsWithoutInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("12345 #$%@#^%&"));
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
// should have something after the stream
- stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("12345 #$%@#^%& hello"));
+ stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%& hello"));
filter = new DoubleMetaphoneFilter(stream, 8, false);
assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&", "HL" });
}
diff --git a/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java b/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
new file mode 100644
index 00000000000..811d1ec1906
--- /dev/null
+++ b/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.phonetic;
+
+import java.io.StringReader;
+
+import org.apache.commons.codec.Encoder;
+import org.apache.commons.codec.language.Caverphone;
+import org.apache.commons.codec.language.DoubleMetaphone;
+import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.RefinedSoundex;
+import org.apache.commons.codec.language.Soundex;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Tests {@link PhoneticFilter}
+ */
+public class TestPhoneticFilter extends BaseTokenStreamTestCase {
+
+ public void testAlgorithms() throws Exception {
+ assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
+ new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
+ assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg",
+ new String[] { "A", "B", "KKK", "ESKS" });
+
+ assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
+ new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
+ assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
+ new String[] { "A", "PP", "KK", "ASKS" });
+
+ assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
+ new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
+ assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
+ new String[] { "A000", "B000", "C000", "E220" });
+
+ assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
+ new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
+ assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
+ new String[] { "A0", "B1", "C3", "E034034" });
+
+ assertAlgorithm(new Caverphone(), true, "Darda Karleen Datha Carlene",
+ new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
+ "TTA1111111", "Datha", "KLN1111111", "Carlene" });
+ assertAlgorithm(new Caverphone(), false, "Darda Karleen Datha Carlene",
+ new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+ }
+
+
+ static void assertAlgorithm(Encoder encoder, boolean inject, String input,
+ String[] expected) throws Exception {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new StringReader(input));
+ PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
+ assertTokenStreamContents(filter, expected);
+ }
+}
diff --git a/solr/common-build.xml b/solr/common-build.xml
index 62b79889a00..805bb769cec 100644
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@@ -147,6 +147,7 @@
+
@@ -162,6 +163,7 @@
+
@@ -181,6 +183,9 @@
+
+
+
@@ -206,6 +211,7 @@
+
diff --git a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
index b2afbfd4937..07ab89ba6a6 100644
--- a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
@@ -17,11 +17,10 @@
package org.apache.solr.analysis;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.CapitalizationFilter;
import org.apache.lucene.analysis.util.CharArraySet;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -29,11 +28,7 @@ import java.util.Map;
import java.util.StringTokenizer;
/**
- * A filter to apply normal capitalization rules to Tokens. It will make the first letter
- * capital and the rest lower case.
- *
- * This filter is particularly useful to build nice looking facet parameters. This filter
- * is not appropriate if you intend to use a prefix query.
+ * Factory for {@link CapitalizationFilter}.
*
* The factory takes parameters:
* "onlyFirstWord" - should each word be capitalized or all of the words?
@@ -52,7 +47,6 @@ import java.util.StringTokenizer;
* @since solr 1.3
*/
public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
- public static final int DEFAULT_MAX_WORD_COUNT = Integer.MAX_VALUE;
public static final String KEEP = "keep";
public static final String KEEP_IGNORE_CASE = "keepIgnoreCase";
public static final String OK_PREFIX = "okPrefix";
@@ -68,8 +62,8 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
Collection okPrefix = Collections.emptyList(); // for Example: McK
int minWordLength = 0; // don't modify capitalization for words shorter then this
- int maxWordCount = DEFAULT_MAX_WORD_COUNT;
- int maxTokenLength = DEFAULT_MAX_WORD_COUNT;
+ int maxWordCount = CapitalizationFilter.DEFAULT_MAX_WORD_COUNT;
+ int maxTokenLength = CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH;
boolean onlyFirstWord = true;
boolean forceFirstLetter = true; // make sure the first letter is capitol even if it is in the keep list
@@ -128,116 +122,8 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
}
}
-
- public void processWord(char[] buffer, int offset, int length, int wordCount) {
- if (length < 1) {
- return;
- }
- if (onlyFirstWord && wordCount > 0) {
- for (int i = 0; i < length; i++) {
- buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
-
- }
- return;
- }
-
- if (keep != null && keep.contains(buffer, offset, length)) {
- if (wordCount == 0 && forceFirstLetter) {
- buffer[offset] = Character.toUpperCase(buffer[offset]);
- }
- return;
- }
- if (length < minWordLength) {
- return;
- }
- for (char[] prefix : okPrefix) {
- if (length >= prefix.length) { //don't bother checking if the buffer length is less than the prefix
- boolean match = true;
- for (int i = 0; i < prefix.length; i++) {
- if (prefix[i] != buffer[offset + i]) {
- match = false;
- break;
- }
- }
- if (match == true) {
- return;
- }
- }
- }
-
- // We know it has at least one character
- /*char[] chars = w.toCharArray();
- StringBuilder word = new StringBuilder( w.length() );
- word.append( Character.toUpperCase( chars[0] ) );*/
- buffer[offset] = Character.toUpperCase(buffer[offset]);
-
- for (int i = 1; i < length; i++) {
- buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
- }
- //return word.toString();
- }
-
public CapitalizationFilter create(TokenStream input) {
- return new CapitalizationFilter(input, this);
+ return new CapitalizationFilter(input, onlyFirstWord, keep,
+ forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
}
}
-
-
-/**
- * This relies on the Factory so that the difficult stuff does not need to be
- * re-initialized each time the filter runs.
- *
- * This is package protected since it is not useful without the Factory
- */
-final class CapitalizationFilter extends TokenFilter {
- private final CapitalizationFilterFactory factory;
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-
- public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
- super(in);
- this.factory = factory;
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (!input.incrementToken()) return false;
-
- char[] termBuffer = termAtt.buffer();
- int termBufferLength = termAtt.length();
- char[] backup = null;
- if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
- //make a backup in case we exceed the word count
- backup = new char[termBufferLength];
- System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
- }
- if (termBufferLength < factory.maxTokenLength) {
- int wordCount = 0;
-
- int lastWordStart = 0;
- for (int i = 0; i < termBufferLength; i++) {
- char c = termBuffer[i];
- if (c <= ' ' || c == '.') {
- int len = i - lastWordStart;
- if (len > 0) {
- factory.processWord(termBuffer, lastWordStart, len, wordCount++);
- lastWordStart = i + 1;
- i++;
- }
- }
- }
-
- // process the last word
- if (lastWordStart < termBufferLength) {
- factory.processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
- }
-
- if (wordCount > factory.maxWordCount) {
- termAtt.copyBuffer(backup, 0, termBufferLength);
- }
- }
-
- return true;
- }
-
-}
-
diff --git a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
index d7ec11ec8ea..bb72143c56c 100644
--- a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory
{
diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
index 3872417d0ae..b53b9f35841 100644
--- a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
@@ -29,6 +29,7 @@ import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.phonetic.PhoneticFilter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.StrUtils;
@@ -96,6 +97,6 @@ public class PhoneticFilterFactory extends BaseTokenFilterFactory
}
public PhoneticFilter create(TokenStream input) {
- return new PhoneticFilter(input,encoder,name,inject);
+ return new PhoneticFilter(input,encoder,inject);
}
}
diff --git a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java b/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
index c61c827ca02..5c155d78317 100644
--- a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
+++ b/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
diff --git a/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java b/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilterFactory.java
similarity index 64%
rename from solr/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java
rename to solr/src/test/org/apache/solr/analysis/TestCapitalizationFilterFactory.java
index 2b1bd10e035..343754bd565 100644
--- a/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilter.java
+++ b/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilterFactory.java
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
*
*/
-public class TestCapitalizationFilter extends BaseTokenTestCase {
+public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
public void testCapitalization() throws Exception
{
@@ -40,74 +40,78 @@ public class TestCapitalizationFilter extends BaseTokenTestCase {
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.init( args );
- char[] termBuffer;
- termBuffer = "kiTTEN".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "Kitten", new String(termBuffer, 0, termBuffer.length));
-
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("kiTTEN"))),
+ new String[] { "Kitten" });
+
factory.forceFirstLetter = true;
- termBuffer = "and".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "And", new String(termBuffer, 0, termBuffer.length));//first is forced
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("and"))),
+ new String[] { "And" });
- termBuffer = "AnD".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "And", new String(termBuffer, 0, termBuffer.length));//first is forced, but it's not a keep word, either
+ //first is forced, but it's not a keep word, either
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("AnD"))),
+ new String[] { "And" });
factory.forceFirstLetter = false;
- termBuffer = "AnD".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "And", new String(termBuffer, 0, termBuffer.length)); //first is not forced, but it's not a keep word, either
+
+ //first is not forced, but it's not a keep word, either
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("AnD"))),
+ new String[] { "And" });
factory.forceFirstLetter = true;
- termBuffer = "big".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "Big", new String(termBuffer, 0, termBuffer.length));
- termBuffer = "BIG".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "BIG", new String(termBuffer, 0, termBuffer.length));
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader("Hello thEre my Name is Ryan"));
- TokenStream stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("big"))),
+ new String[] { "Big" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("BIG"))),
+ new String[] { "BIG" });
+
+ assertTokenStreamContents(factory.create(
+ new KeywordTokenizer(new StringReader("Hello thEre my Name is Ryan"))),
+ new String[] { "Hello there my name is ryan" });
+
// now each token
factory.onlyFirstWord = false;
- tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"));
- stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"))),
+ new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
// now only the long words
factory.minWordLength = 3;
- tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan" ));
- stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"))),
+ new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
// without prefix
- tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley" ));
- stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "Mckinley" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley"))),
+ new String[] { "Mckinley" });
// Now try some prefixes
factory = new CapitalizationFilterFactory();
args.put( "okPrefix", "McK" ); // all words
factory.init( args );
- tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley" ));
- stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "McKinley" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley"))),
+ new String[] { "McKinley" });
// now try some stuff with numbers
factory.forceFirstLetter = false;
factory.onlyFirstWord = false;
- tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("1st 2nd third" ));
- stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "1st", "2nd", "Third" });
+ assertTokenStreamContents(factory.create(
+ new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("1st 2nd third"))),
+ new String[] { "1st", "2nd", "Third" });
- factory.forceFirstLetter = true;
- tokenizer = new KeywordTokenizer(new StringReader("the The the" ));
- stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, new String[] { "The The the" });
+ factory.forceFirstLetter = true;
+ assertTokenStreamContents(factory.create(
+ new KeywordTokenizer(new StringReader("the The the"))),
+ new String[] { "The The the" });
}
public void testKeepIgnoreCase() throws Exception {
@@ -118,21 +122,20 @@ public class TestCapitalizationFilter extends BaseTokenTestCase {
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.init( args );
- char[] termBuffer;
- termBuffer = "kiTTEN".toCharArray();
factory.forceFirstLetter = true;
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "KiTTEN", new String(termBuffer, 0, termBuffer.length));
+ assertTokenStreamContents(factory.create(
+ new KeywordTokenizer(new StringReader("kiTTEN"))),
+ new String[] { "KiTTEN" });
factory.forceFirstLetter = false;
- termBuffer = "kiTTEN".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "kiTTEN", new String(termBuffer, 0, termBuffer.length));
+ assertTokenStreamContents(factory.create(
+ new KeywordTokenizer(new StringReader("kiTTEN"))),
+ new String[] { "kiTTEN" });
factory.keep = null;
- termBuffer = "kiTTEN".toCharArray();
- factory.processWord(termBuffer, 0, termBuffer.length, 0 );
- assertEquals( "Kitten", new String(termBuffer, 0, termBuffer.length));
+ assertTokenStreamContents(factory.create(
+ new KeywordTokenizer(new StringReader("kiTTEN"))),
+ new String[] { "Kitten" });
}
/**
diff --git a/solr/src/test/org/apache/solr/analysis/TestPhoneticFilter.java b/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java
similarity index 98%
rename from solr/src/test/org/apache/solr/analysis/TestPhoneticFilter.java
rename to solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java
index c2875beb38a..f9f8cca3f2a 100644
--- a/solr/src/test/org/apache/solr/analysis/TestPhoneticFilter.java
+++ b/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* @version $Id$
*/
-public class TestPhoneticFilter extends BaseTokenTestCase {
+public class TestPhoneticFilterFactory extends BaseTokenTestCase {
public void testFactory()
{