LUCENE-2413: consolidate remaining solr tokenstreams into modules/analysis

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@957162 13f79535-47bb-0310-9956-ffa450edef68
2010-06-23 11:25:17 +00:00 · 2010-06-23 11:25:17 +00:00 · 8f71031ac8
parent 653c7c160b
commit 8f71031ac8
20 changed files with 604 additions and 199 deletions
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@ -27,11 +27,14 @@ New Features
     with text contained in the required words (inverse of StopFilter).
   - o.a.l.analysis.miscellaneous.HyphenatedWordsFilter: A TokenFilter that puts 
     hyphenated words broken into two lines back together.
   - o.a.l.analysis.miscellaneous.CapitalizationFilter: A TokenFilter that applies
     capitalization rules to tokens.
   - o.a.l.analysis.pattern: Package for pattern-based analysis, containing a 
     CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
   - o.a.l.analysis.synonym.SynonymFilter: A synonym filter that supports multi-word
     synonyms.
-   (... in progress)
+   - o.a.l.analysis.phonetic: Package for phonetic search, containing various
     phonetic encoders such as Double Metaphone.
  * LUCENE-2413: Consolidated all Lucene analyzers into common.
    - o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
@ -60,7 +63,6 @@ New Features
    - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
    - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
    - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
    ... (in progress)
 Build
--- a/modules/analysis/NOTICE.txt
+++ b/modules/analysis/NOTICE.txt
@ -4,6 +4,10 @@ Copyright 2006 The Apache Software Foundation
 This product includes software developed by
 The Apache Software Foundation (http://www.apache.org/).
 Includes software from other Apache Software Foundation projects,
 including, but not limited to:
  - Apache Commons
 The snowball stemmers in
  common/src/java/net/sf/snowball
 were developed by Martin Porter and Richard Boulton.
--- a/modules/analysis/README.txt
+++ b/modules/analysis/README.txt
@ -20,7 +20,12 @@ lucene-analyzers-common-XX.jar
 lucene-analyzers-icu-XX.jar
  An add-on analysis library that provides improved Unicode support via
  International Components for Unicode (ICU). Note: this module depends on
-  the ICU4j jar file (version > 4.4.0)
+  the ICU4j jar file (version >= 4.4.0)
 lucene-analyzers-phonetic-XX.jar
  An add-on analysis library that provides phonetic encoders via Apache
  Commons-Codec. Note: this module depends on the commons-codec jar 
  file (version >= 1.4)
 lucene-analyzers-smartcn-XX.jar
  An add-on analysis library that provides word segmentation for Simplified
@ -32,12 +37,14 @@ lucene-analyzers-stempel-XX.jar
 common/src/java
 icu/src/java
 phonetic/src/java
 smartcn/src/java
 stempel/src/java
-  The source code for the four libraries.
+  The source code for the ffve libraries.
 common/src/test
 icu/src/test
 phonetic/src/test
 smartcn/src/test
 stempel/src/test
-  Unit tests for the four libraries.
+  Unit tests for the five libraries.
--- a/modules/analysis/build.xml
+++ b/modules/analysis/build.xml
@ -35,6 +35,10 @@
    <ant dir="icu" />
  </target>
  <target name="phonetic">
    <ant dir="phonetic" />
  </target>
  <target name="smartcn">
    <ant dir="smartcn" />
  </target>
@ -44,29 +48,33 @@
  </target>
  <target name="default" depends="compile"/>
-  <target name="compile" depends="common,icu,smartcn,stempel" />
+  <target name="compile" depends="common,icu,phonetic,smartcn,stempel" />
  <target name="clean">
    <ant dir="common" target="clean" />
    <ant dir="icu" target="clean" />
    <ant dir="phonetic" target="clean" />
    <ant dir="smartcn" target="clean" />
    <ant dir="stempel" target="clean" />
  </target>
  <target name="compile-core">
    <ant dir="common" target="compile-core" />
    <ant dir="icu" target="compile-core" />
    <ant dir="phonetic" target="compile-core" />
    <ant dir="smartcn" target="compile-core" />
    <ant dir="stempel" target="compile-core" />
  </target>
  <target name="compile-test">
    <ant dir="common" target="compile-test" />
    <ant dir="icu" target="compile-test" />
    <ant dir="phonetic" target="compile-test" />
    <ant dir="smartcn" target="compile-test" />
    <ant dir="stempel" target="compile-test" />
  </target>
  <target name="test">
    <ant dir="common" target="test" />
    <ant dir="icu" target="test" />
    <ant dir="phonetic" target="test" />
    <ant dir="smartcn" target="test" />
    <ant dir="stempel" target="test" />
  </target>
@ -76,6 +84,7 @@
  <target name="dist-maven" depends="default">
    <ant dir="common" target="dist-maven" />
    <ant dir="icu" target="dist-maven" />
    <ant dir="phonetic" target="dist-maven" />
    <ant dir="smartcn" target="dist-maven" />
    <ant dir="stempel" target="dist-maven" />
  </target>  	
@ -83,6 +92,7 @@
  <target name="javadocs">
    <ant dir="common" target="javadocs" />
    <ant dir="icu" target="javadocs" />
    <ant dir="phonetic" target="javadocs" />
    <ant dir="smartcn" target="javadocs" />
    <ant dir="stempel" target="javadocs" />
  </target>  	
@ -90,6 +100,7 @@
  <target name="javadocs-index.html">
    <ant dir="common" target="javadocs-index.html" />
    <ant dir="icu" target="javadocs-index.html" />
    <ant dir="phonetic" target="javadocs-index.html" />
    <ant dir="smartcn" target="javadocs-index.html" />
    <ant dir="stempel" target="javadocs-index.html" />
  </target>
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
@ -0,0 +1,181 @@
 package org.apache.lucene.analysis.miscellaneous;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Collection;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
 /** 
 * A filter to apply normal capitalization rules to Tokens.  It will make the first letter
 * capital and the rest lower case.
 * <p/>
 * This filter is particularly useful to build nice looking facet parameters.  This filter
 * is not appropriate if you intend to use a prefix query.
 */
 public final class CapitalizationFilter extends TokenFilter {
  public static final int DEFAULT_MAX_WORD_COUNT = Integer.MAX_VALUE;
  public static final int DEFAULT_MAX_TOKEN_LENGTH = Integer.MAX_VALUE;
  private final boolean onlyFirstWord;
  private final CharArraySet keep;
  private final boolean forceFirstLetter;
  private final Collection<char[]> okPrefix;
  private final int minWordLength;
  private final int maxWordCount;
  private final int maxTokenLength;
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  /**
   * Creates a CapitalizationFilter with the default parameters.
   * <p>
   * Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
   *   CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
   */
  public CapitalizationFilter(TokenStream in) {
    this(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
  }
  /**
   * Creates a CapitalizationFilter with the specified parameters.
   * @param in input tokenstream 
   * @param onlyFirstWord should each word be capitalized or all of the words?
   * @param keep a keep word list.  Each word that should be kept separated by whitespace.
   * @param forceFirstLetter Force the first letter to be capitalized even if it is in the keep list.
   * @param okPrefix do not change word capitalization if a word begins with something in this list.
   * @param minWordLength how long the word needs to be to get capitalization applied.  If the
   *                      minWordLength is 3, "and" > "And" but "or" stays "or".
   * @param maxWordCount if the token contains more then maxWordCount words, the capitalization is
   *                     assumed to be correct.
   * @param maxTokenLength ???
   */
  public CapitalizationFilter(TokenStream in, boolean onlyFirstWord, CharArraySet keep, 
      boolean forceFirstLetter, Collection<char[]> okPrefix, int minWordLength, 
      int maxWordCount, int maxTokenLength) {
    super(in);
    this.onlyFirstWord = onlyFirstWord;
    this.keep = keep;
    this.forceFirstLetter = forceFirstLetter;
    this.okPrefix = okPrefix;
    this.minWordLength = minWordLength;
    this.maxWordCount = maxWordCount;
    this.maxTokenLength = maxTokenLength;
  }
  @Override
  public boolean incrementToken() throws IOException {
    if (!input.incrementToken()) return false;
    char[] termBuffer = termAtt.buffer();
    int termBufferLength = termAtt.length();
    char[] backup = null;
    if (maxWordCount < DEFAULT_MAX_WORD_COUNT) {
      //make a backup in case we exceed the word count
      backup = new char[termBufferLength];
      System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
    }
    if (termBufferLength < maxTokenLength) {
      int wordCount = 0;
      int lastWordStart = 0;
      for (int i = 0; i < termBufferLength; i++) {
        char c = termBuffer[i];
        if (c <= ' ' || c == '.') {
          int len = i - lastWordStart;
          if (len > 0) {
            processWord(termBuffer, lastWordStart, len, wordCount++);
            lastWordStart = i + 1;
            i++;
          }
        }
      }
      // process the last word
      if (lastWordStart < termBufferLength) {
        processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
      }
      if (wordCount > maxWordCount) {
        termAtt.copyBuffer(backup, 0, termBufferLength);
      }
    }
    return true;
  }
  private void processWord(char[] buffer, int offset, int length, int wordCount) {
    if (length < 1) {
      return;
    }
    if (onlyFirstWord && wordCount > 0) {
      for (int i = 0; i < length; i++) {
        buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
      }
      return;
    }
    if (keep != null && keep.contains(buffer, offset, length)) {
      if (wordCount == 0 && forceFirstLetter) {
        buffer[offset] = Character.toUpperCase(buffer[offset]);
      }
      return;
    }
    if (length < minWordLength) {
      return;
    }
    if (okPrefix != null) {
      for (char[] prefix : okPrefix) {
        if (length >= prefix.length) { //don't bother checking if the buffer length is less than the prefix
          boolean match = true;
          for (int i = 0; i < prefix.length; i++) {
            if (prefix[i] != buffer[offset + i]) {
              match = false;
              break;
            }
          }
          if (match == true) {
            return;
          }
        }
      }
    }
    // We know it has at least one character
    /*char[] chars = w.toCharArray();
    StringBuilder word = new StringBuilder( w.length() );
    word.append( Character.toUpperCase( chars[0] ) );*/
    buffer[offset] = Character.toUpperCase(buffer[offset]);
    for (int i = 1; i < length; i++) {
      buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
    }
    //return word.toString();
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
@ -0,0 +1,121 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;
 /** Tests {@link CapitalizationFilter} */
 public class TestCapitalizationFilter extends BaseTokenStreamTestCase {  
  public void testCapitalization() throws Exception {
    CharArraySet keep = new CharArraySet(TEST_VERSION_CURRENT,
        Arrays.asList("and", "the", "it", "BIG"), false);
    assertCapitalizesTo("kiTTEN", new String[] { "Kitten" }, 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    assertCapitalizesTo("and", new String[] { "And" }, 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    assertCapitalizesTo("AnD", new String[] { "And" }, 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    //first is not forced, but it's not a keep word, either
    assertCapitalizesTo("AnD", new String[] { "And" }, 
        true, keep, false, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    assertCapitalizesTo("big", new String[] { "Big" }, 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    assertCapitalizesTo("BIG", new String[] { "BIG" }, 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    assertCapitalizesToKeyword("Hello thEre my Name is Ryan", "Hello there my name is ryan", 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    // now each token
    assertCapitalizesTo("Hello thEre my Name is Ryan", 
        new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" }, 
        false, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    // now only the long words
    assertCapitalizesTo("Hello thEre my Name is Ryan", 
        new String[] { "Hello", "There", "my", "Name", "is", "Ryan" }, 
        false, keep, true, null, 3, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    // without prefix
    assertCapitalizesTo("McKinley", 
        new String[] { "Mckinley" }, 
        true, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    // Now try some prefixes
    List<char[]> okPrefix = new ArrayList<char[]>();
    okPrefix.add("McK".toCharArray());
    assertCapitalizesTo("McKinley", 
        new String[] { "McKinley" }, 
        true, keep, true, okPrefix, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
    // now try some stuff with numbers
    assertCapitalizesTo("1st 2nd third", 
        new String[] { "1st", "2nd", "Third" }, 
        false, keep, false, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);    
    assertCapitalizesToKeyword("the The the", "The The the", 
        false, keep, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);    
  }
  static void assertCapitalizesTo(Tokenizer tokenizer, String expected[],
      boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
      Collection<char[]> okPrefix, int minWordLength, int maxWordCount,
      int maxTokenLength) throws IOException {
    CapitalizationFilter filter = new CapitalizationFilter(tokenizer, onlyFirstWord, keep, 
        forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
    assertTokenStreamContents(filter, expected);    
  }
  static void assertCapitalizesTo(String input, String expected[],
      boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
      Collection<char[]> okPrefix, int minWordLength, int maxWordCount,
      int maxTokenLength) throws IOException {
    assertCapitalizesTo(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
        expected, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength, 
        maxWordCount, maxTokenLength);
  }
  static void assertCapitalizesToKeyword(String input, String expected,
      boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
      Collection<char[]> okPrefix, int minWordLength, int maxWordCount,
      int maxTokenLength) throws IOException {
    assertCapitalizesTo(new KeywordTokenizer(new StringReader(input)),
        new String[] { expected }, onlyFirstWord, keep, forceFirstLetter, okPrefix,
        minWordLength, maxWordCount, maxTokenLength);    
  }
 }
--- a/modules/analysis/phonetic/build.xml
+++ b/modules/analysis/phonetic/build.xml
@ -0,0 +1,63 @@
 <?xml version="1.0"?>
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
    the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at
        http://www.apache.org/licenses/LICENSE-2.0
    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 -->
 <project name="analyzers-phonetic" default="default">
  <description>
  	Provides phonetic encoding support via Apache Commons Codec.
  </description>
  <property name="build.dir" location="../build/phonetic" />
  <property name="dist.dir" location="../dist/phonetic" />
  <path id="additional.dependencies">
    <fileset dir="lib" includes="commons-codec-*.jar"/>
  </path>
  <pathconvert property="project.classpath"
               targetos="unix"
               refid="additional.dependencies"
  />
  <import file="../../../lucene/contrib/contrib-build.xml"/>
  <module-uptodate name="analysis/common" jarfile="../build/common/lucene-analyzers-common-${version}.jar"
    property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
  <path id="classpath">
    <pathelement path="${analyzers-common.jar}"/>
    <path refid="base.classpath"/>
  </path>
  <path id="test.classpath">
  	<pathelement path="${analyzers-common.jar}"/>
    <path refid="classpath"/>
    <pathelement location="../../../lucene/build/classes/test/"/>
  	<pathelement location="../build/common/classes/test/"/>
    <path refid="junit-path"/>
    <pathelement location="${build.dir}/classes/java"/>
  </path>
  <target name="compile-core" depends="build-analyzers-common, common.compile-core" />
  <target name="build-analyzers-common" unless="analyzers-common.uptodate">
    <echo>phonetic building dependency ${analyzers-common.jar}</echo>
    <ant antfile="../common/build.xml" target="default" inheritall="false" dir="../common" />
  </target>
 </project>
--- a/modules/analysis/phonetic/lib/commons-codec-1.4.jar
+++ b/modules/analysis/phonetic/lib/commons-codec-1.4.jar
@ -0,0 +1,2 @@
 AnyObjectId[458d432da88b0efeab640c229903fb5aad274044] was removed in git history.
 Apache SVN contains full history.
--- a/modules/analysis/phonetic/pom.xml.template
+++ b/modules/analysis/phonetic/pom.xml.template
@ -0,0 +1,46 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <!--
    Licensed to the Apache Software Foundation (ASF) under one
    or more contributor license agreements.  See the NOTICE file
    distributed with this work for additional information
    regarding copyright ownership.  The ASF licenses this file
    to you under the Apache License, Version 2.0 (the
    "License"); you may not use this file except in compliance
    with the License.  You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
    Unless required by applicable law or agreed to in writing,
    software distributed under the License is distributed on an
    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    KIND, either express or implied.  See the License for the
    specific language governing permissions and limitations
    under the License.
  -->
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-contrib</artifactId>
    <version>@version@</version>
  </parent>
  <groupId>org.apache.lucene</groupId>
  <artifactId>lucene-phonetic</artifactId>
  <name>
    Lucene Phonetic Filters
  </name>
  <version>@version@</version>
  <description>    
  	Provides phonetic encoding via Commons Codec.
  </description>
  <packaging>jar</packaging>
  <dependencies>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>codec</artifactId>
      <version>${codec-version}</version>
    </dependency>
  </dependencies>
 </project>
--- a/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
+++ b/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
@ -14,7 +14,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.phonetic;
 import java.io.IOException;
 import java.util.LinkedList;
@ -35,7 +35,7 @@ public final class DoubleMetaphoneFilter extends TokenFilter {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
-  protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
+  public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
    super(input);
    this.encoder.setMaxCodeLen(maxCodeLength);
    this.inject = inject;
--- a/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
+++ b/modules/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
@ -15,7 +15,7 @@
 * limitations under the License.
 */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.phonetic;
 import org.apache.commons.codec.Encoder;
 import org.apache.lucene.analysis.TokenFilter;
@ -28,23 +28,19 @@ import java.io.IOException;
 /**
 * Create tokens for phonetic matches.  See:
 * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html
 *
 * @version $Id$
 */
 public final class PhoneticFilter extends TokenFilter 
 {
  protected boolean inject = true; 
  protected Encoder encoder = null;
  protected String name = null;
  protected State save = null;
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
-  public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
+  public PhoneticFilter(TokenStream in, Encoder encoder, boolean inject) {
    super(in);
    this.encoder = encoder;
    this.name = name;
    this.inject = inject;   
  }
--- a/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
+++ b/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
@ -14,52 +14,53 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.solr.analysis;
+package org.apache.lucene.analysis.phonetic;
 import java.io.StringReader;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-public class DoubleMetaphoneFilterTest extends BaseTokenTestCase {
+public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase {
  public void testSize4FalseInject() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
+    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
    TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
    assertTokenStreamContents(filter, new String[] { "ANTR" });
  }
  public void testSize4TrueInject() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
+    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
    TokenStream filter = new DoubleMetaphoneFilter(stream, 4, true);
    assertTokenStreamContents(filter, new String[] { "international", "ANTR" });
  }
  public void testAlternateInjectFalse() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Kuczewski"));
+    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Kuczewski"));
    TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
    assertTokenStreamContents(filter, new String[] { "KSSK", "KXFS" });
  }
  public void testSize8FalseInject() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
+    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
    TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
    assertTokenStreamContents(filter, new String[] { "ANTRNXNL" });
  }
  public void testNonConvertableStringsWithInject() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("12345 #$%@#^%&"));
+    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
    TokenStream filter = new DoubleMetaphoneFilter(stream, 8, true);
    assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
  }
  public void testNonConvertableStringsWithoutInject() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("12345 #$%@#^%&"));
+    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
    TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
    assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
    // should have something after the stream
-    stream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("12345 #$%@#^%& hello"));
+    stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%& hello"));
    filter = new DoubleMetaphoneFilter(stream, 8, false);
    assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&", "HL" });
  }
--- a/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
+++ b/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
@ -0,0 +1,73 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.analysis.phonetic;
 import java.io.StringReader;
 import org.apache.commons.codec.Encoder;
 import org.apache.commons.codec.language.Caverphone;
 import org.apache.commons.codec.language.DoubleMetaphone;
 import org.apache.commons.codec.language.Metaphone;
 import org.apache.commons.codec.language.RefinedSoundex;
 import org.apache.commons.codec.language.Soundex;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 /**
 * Tests {@link PhoneticFilter}
 */
 public class TestPhoneticFilter extends BaseTokenStreamTestCase {
  public void testAlgorithms() throws Exception {
    assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
        new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
    assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg",
        new String[] { "A", "B", "KKK", "ESKS" });
    assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
        new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
    assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
        new String[] { "A", "PP", "KK", "ASKS" });
    assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
        new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
    assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
        new String[] { "A000", "B000", "C000", "E220" });
    assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
        new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
    assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
        new String[] { "A0", "B1", "C3", "E034034" });
    assertAlgorithm(new Caverphone(), true, "Darda Karleen Datha Carlene",
        new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", 
          "TTA1111111", "Datha", "KLN1111111", "Carlene" });
    assertAlgorithm(new Caverphone(), false, "Darda Karleen Datha Carlene",
        new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
  }
  static void assertAlgorithm(Encoder encoder, boolean inject, String input,
      String[] expected) throws Exception {
    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
        new StringReader(input));
    PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
    assertTokenStreamContents(filter, expected);
  }
 }
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@ -147,6 +147,7 @@
  <path id="lucene.classpath">
    <pathelement location="${common-solr.dir}/../lucene/build/classes/java" />
    <pathelement location="${common-solr.dir}/../modules/analysis/build/common/classes/java" />
    <pathelement location="${common-solr.dir}/../modules/analysis/build/phonetic/classes/java" />
    <pathelement location="${common-solr.dir}/../lucene/build/contrib/highlighter/classes/java" />
    <pathelement location="${common-solr.dir}/../lucene/build/contrib/memory/classes/java" />
    <pathelement location="${common-solr.dir}/../lucene/build/contrib/misc/classes/java" />
@ -162,6 +163,7 @@
      </subant>
      <subant target="jar" inheritall="false" failonerror="true">
        <fileset dir="../modules/analysis/common" includes="build.xml" />
        <fileset dir="../modules/analysis/phonetic" includes="build.xml" />
        <fileset dir="../lucene/contrib/highlighter" includes="build.xml" />
        <fileset dir="../lucene/contrib/memory" includes="build.xml" />
        <fileset dir="../lucene/contrib/misc" includes="build.xml" />
@ -181,6 +183,9 @@
      <fileset dir="../modules/analysis/build/common">
        <include name="lucene-analyzers-common-${version}.jar" />
      </fileset>
      <fileset dir="../modules/analysis/build/phonetic">
        <include name="lucene-analyzers-phonetic-${version}.jar" />
      </fileset>
      <fileset dir="../lucene/build/contrib/highlighter">
        <include name="lucene-highlighter-${version}.jar" />
      </fileset>
@ -206,6 +211,7 @@
    <property name="lucene-compiled" value="true"/>
    <subant target="default">
      <fileset dir="../modules/analysis/common" includes="build.xml"/>
      <fileset dir="../modules/analysis/phonetic" includes="build.xml"/>
      <fileset dir="../lucene/contrib/highlighter" includes="build.xml"/>
      <fileset dir="../lucene/contrib/memory" includes="build.xml"/>
      <fileset dir="../lucene/contrib/misc" includes="build.xml"/>
--- a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
@ -17,11 +17,10 @@
 package org.apache.solr.analysis;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.miscellaneous.CapitalizationFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@ -29,11 +28,7 @@ import java.util.Map;
 import java.util.StringTokenizer;
 /**
- * A filter to apply normal capitalization rules to Tokens.  It will make the first letter
+ * Factory for {@link CapitalizationFilter}.
 * capital and the rest lower case.
 * <p/>
 * This filter is particularly useful to build nice looking facet parameters.  This filter
 * is not appropriate if you intend to use a prefix query.
 * <p/>
 * The factory takes parameters:<br/>
 * "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
@ -52,7 +47,6 @@ import java.util.StringTokenizer;
 * @since solr 1.3
 */
 public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
  public static final int DEFAULT_MAX_WORD_COUNT = Integer.MAX_VALUE;
  public static final String KEEP = "keep";
  public static final String KEEP_IGNORE_CASE = "keepIgnoreCase";
  public static final String OK_PREFIX = "okPrefix";
@ -68,8 +62,8 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
  Collection<char[]> okPrefix = Collections.emptyList(); // for Example: McK
  int minWordLength = 0;  // don't modify capitalization for words shorter then this
-  int maxWordCount = DEFAULT_MAX_WORD_COUNT;
+  int maxWordCount = CapitalizationFilter.DEFAULT_MAX_WORD_COUNT;
-  int maxTokenLength = DEFAULT_MAX_WORD_COUNT;
+  int maxTokenLength = CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH;
  boolean onlyFirstWord = true;
  boolean forceFirstLetter = true; // make sure the first letter is capitol even if it is in the keep list
@ -128,116 +122,8 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
    }
  }
  public void processWord(char[] buffer, int offset, int length, int wordCount) {
    if (length < 1) {
      return;
    }
    if (onlyFirstWord && wordCount > 0) {
      for (int i = 0; i < length; i++) {
        buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
      }
      return;
    }
    if (keep != null && keep.contains(buffer, offset, length)) {
      if (wordCount == 0 && forceFirstLetter) {
        buffer[offset] = Character.toUpperCase(buffer[offset]);
      }
      return;
    }
    if (length < minWordLength) {
      return;
    }
    for (char[] prefix : okPrefix) {
      if (length >= prefix.length) { //don't bother checking if the buffer length is less than the prefix
        boolean match = true;
        for (int i = 0; i < prefix.length; i++) {
          if (prefix[i] != buffer[offset + i]) {
            match = false;
            break;
          }
        }
        if (match == true) {
          return;
        }
      }
    }
    // We know it has at least one character
    /*char[] chars = w.toCharArray();
    StringBuilder word = new StringBuilder( w.length() );
    word.append( Character.toUpperCase( chars[0] ) );*/
    buffer[offset] = Character.toUpperCase(buffer[offset]);
    for (int i = 1; i < length; i++) {
      buffer[offset + i] = Character.toLowerCase(buffer[offset + i]);
    }
    //return word.toString();
  }
  public CapitalizationFilter create(TokenStream input) {
-    return new CapitalizationFilter(input, this);
+    return new CapitalizationFilter(input, onlyFirstWord, keep, 
      forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
  }
 }
 /**
 * This relies on the Factory so that the difficult stuff does not need to be
 * re-initialized each time the filter runs.
 * <p/>
 * This is package protected since it is not useful without the Factory
 */
 final class CapitalizationFilter extends TokenFilter {
  private final CapitalizationFilterFactory factory;
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
    super(in);
    this.factory = factory;
  }
  @Override
  public boolean incrementToken() throws IOException {
    if (!input.incrementToken()) return false;
    char[] termBuffer = termAtt.buffer();
    int termBufferLength = termAtt.length();
    char[] backup = null;
    if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
      //make a backup in case we exceed the word count
      backup = new char[termBufferLength];
      System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
    }
    if (termBufferLength < factory.maxTokenLength) {
      int wordCount = 0;
      int lastWordStart = 0;
      for (int i = 0; i < termBufferLength; i++) {
        char c = termBuffer[i];
        if (c <= ' ' || c == '.') {
          int len = i - lastWordStart;
          if (len > 0) {
            factory.processWord(termBuffer, lastWordStart, len, wordCount++);
            lastWordStart = i + 1;
            i++;
          }
        }
      }
      // process the last word
      if (lastWordStart < termBufferLength) {
        factory.processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
      }
      if (wordCount > factory.maxWordCount) {
        termAtt.copyBuffer(backup, 0, termBufferLength);
      }
    }
    return true;
  }
 }
--- a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
@ -19,6 +19,7 @@ package org.apache.solr.analysis;
 import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
 public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory 
 {
--- a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
@ -29,6 +29,7 @@ import org.apache.commons.codec.language.Metaphone;
 import org.apache.commons.codec.language.RefinedSoundex;
 import org.apache.commons.codec.language.Soundex;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.phonetic.PhoneticFilter;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.StrUtils;
@ -96,6 +97,6 @@ public class PhoneticFilterFactory extends BaseTokenFilterFactory
  }
  public PhoneticFilter create(TokenStream input) {
-    return new PhoneticFilter(input,encoder,name,inject);
+    return new PhoneticFilter(input,encoder,inject);
  }
 }
--- a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
+++ b/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java
@ -22,6 +22,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
--- a/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestCapitalizationFilterFactory.java
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 /**
 * 
 */
-public class TestCapitalizationFilter extends BaseTokenTestCase {
+public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
  public void testCapitalization() throws Exception 
  {
@ -40,74 +40,78 @@ public class TestCapitalizationFilter extends BaseTokenTestCase {
    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
    factory.init( args );
-    char[] termBuffer;
+    assertTokenStreamContents(factory.create(
-    termBuffer = "kiTTEN".toCharArray();
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("kiTTEN"))),
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+        new String[] { "Kitten" });
-    assertEquals( "Kitten",  new String(termBuffer, 0, termBuffer.length));
+    
    factory.forceFirstLetter = true;
-    termBuffer = "and".toCharArray();
+    assertTokenStreamContents(factory.create(
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("and"))),
-    assertEquals( "And",  new String(termBuffer, 0, termBuffer.length));//first is forced
+        new String[] { "And" });
-    termBuffer = "AnD".toCharArray();
+    //first is forced, but it's not a keep word, either
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+    assertTokenStreamContents(factory.create(
-    assertEquals( "And",  new String(termBuffer, 0, termBuffer.length));//first is forced, but it's not a keep word, either
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("AnD"))),
        new String[] { "And" });
    factory.forceFirstLetter = false;
-    termBuffer = "AnD".toCharArray();
+
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+    //first is not forced, but it's not a keep word, either
-    assertEquals( "And",  new String(termBuffer, 0, termBuffer.length)); //first is not forced, but it's not a keep word, either
+    assertTokenStreamContents(factory.create(
        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("AnD"))),
        new String[] { "And" });
    factory.forceFirstLetter = true;
    termBuffer = "big".toCharArray();
    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
    assertEquals( "Big",  new String(termBuffer, 0, termBuffer.length));
    termBuffer = "BIG".toCharArray();
    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
    assertEquals( "BIG",  new String(termBuffer, 0, termBuffer.length));
-    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("Hello thEre my Name is Ryan"));
+    assertTokenStreamContents(factory.create(
-    TokenStream stream = factory.create(tokenizer);
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("big"))),
-    assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
+        new String[] { "Big" });
    assertTokenStreamContents(factory.create(
        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("BIG"))),
        new String[] { "BIG" });
    assertTokenStreamContents(factory.create(
        new KeywordTokenizer(new StringReader("Hello thEre my Name is Ryan"))),
        new String[] { "Hello there my name is ryan" });
    // now each token
    factory.onlyFirstWord = false;
-    tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"));
+    assertTokenStreamContents(factory.create(
-    stream = factory.create(tokenizer);
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"))),
-    assertTokenStreamContents(stream, new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
+        new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
    // now only the long words
    factory.minWordLength = 3;
-    tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan" ));
+    assertTokenStreamContents(factory.create(
-    stream = factory.create(tokenizer);
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"))),
-    assertTokenStreamContents(stream, new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
+        new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
    // without prefix
-    tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley" ));
+    assertTokenStreamContents(factory.create(
-    stream = factory.create(tokenizer);
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley"))),
-    assertTokenStreamContents(stream, new String[] { "Mckinley" });
+        new String[] { "Mckinley" });
    // Now try some prefixes
    factory = new CapitalizationFilterFactory();
    args.put( "okPrefix", "McK" );  // all words
    factory.init( args );
-    tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley" ));
+    assertTokenStreamContents(factory.create(
-    stream = factory.create(tokenizer);
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley"))),
-    assertTokenStreamContents(stream, new String[] { "McKinley" });
+        new String[] { "McKinley" });
    // now try some stuff with numbers
    factory.forceFirstLetter = false;
    factory.onlyFirstWord = false;
-    tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("1st 2nd third" ));
+    assertTokenStreamContents(factory.create(
-    stream = factory.create(tokenizer);
+        new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("1st 2nd third"))),
-    assertTokenStreamContents(stream, new String[] { "1st", "2nd", "Third" });
+        new String[] { "1st", "2nd", "Third" });
-    factory.forceFirstLetter = true;  
+    factory.forceFirstLetter = true;
-    tokenizer = new KeywordTokenizer(new StringReader("the The the" ));
+    assertTokenStreamContents(factory.create(
-    stream = factory.create(tokenizer);
+        new KeywordTokenizer(new StringReader("the The the"))),
-    assertTokenStreamContents(stream, new String[] { "The The the" });
+        new String[] { "The The the" });
  }
  public void testKeepIgnoreCase() throws Exception {
@ -118,21 +122,20 @@ public class TestCapitalizationFilter extends BaseTokenTestCase {
    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
    factory.init( args );
    char[] termBuffer;
    termBuffer = "kiTTEN".toCharArray();
    factory.forceFirstLetter = true;
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+    assertTokenStreamContents(factory.create(
-    assertEquals( "KiTTEN",  new String(termBuffer, 0, termBuffer.length));
+        new KeywordTokenizer(new StringReader("kiTTEN"))),
        new String[] { "KiTTEN" });
    factory.forceFirstLetter = false;
-    termBuffer = "kiTTEN".toCharArray();
+    assertTokenStreamContents(factory.create(
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+        new KeywordTokenizer(new StringReader("kiTTEN"))),
-    assertEquals( "kiTTEN",  new String(termBuffer, 0, termBuffer.length));
+        new String[] { "kiTTEN" });
    factory.keep = null;
-    termBuffer = "kiTTEN".toCharArray();
+    assertTokenStreamContents(factory.create(
-    factory.processWord(termBuffer, 0, termBuffer.length, 0 );
+        new KeywordTokenizer(new StringReader("kiTTEN"))),
-    assertEquals( "Kitten",  new String(termBuffer, 0, termBuffer.length));
+        new String[] { "Kitten" });
  }
  /**
--- a/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 /**
 * @version $Id$
 */
-public class TestPhoneticFilter extends BaseTokenTestCase {
+public class TestPhoneticFilterFactory extends BaseTokenTestCase {
  public void testFactory()
  {
		`@ -0,0 +1,2 @@`
							`AnyObjectId[458d432da88b0efeab640c229903fb5aad274044] was removed in git history.`
							`Apache SVN contains full history.`