LUCENE-4095: remove deprecations from trunk (just the easy ones for now)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1344531 13f79535-47bb-0310-9956-ffa450edef68
2012-05-31 02:07:11 +00:00 · 2012-05-31 02:07:11 +00:00 · bc3a3dc5d4
parent 8deb16bcf9
commit bc3a3dc5d4
288 changed files with 817 additions and 20160 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@ -137,8 +137,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    final Tokenizer source = matchVersion.onOrAfter(Version.LUCENE_31) ? 
-        new StandardTokenizer(matchVersion, reader) : new ArabicLetterTokenizer(matchVersion, reader);
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new LowerCaseFilter(matchVersion, source);
    // the order here is important: the stopword list is not normalized!
    result = new StopFilter( matchVersion, result, stopwords);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
@ -1,96 +0,0 @@
-package org.apache.lucene.analysis.ar;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
-
-/**
- * Tokenizer that breaks text into runs of letters and diacritics.
- * <p>
- * The problem with the standard Letter tokenizer is that it fails on diacritics.
- * Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
- * </p>
- * <p>
- * <a name="version"/>
- * You must specify the required {@link Version} compatibility when creating
- * {@link ArabicLetterTokenizer}:
- * <ul>
- * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link #isTokenChar(int)} and
- * {@link #normalize(int)} for details.</li>
- * </ul>
- * @deprecated (3.1) Use {@link StandardTokenizer} instead.
- */
-@Deprecated
-public class ArabicLetterTokenizer extends LetterTokenizer {
-  /**
-   * Construct a new ArabicLetterTokenizer.
-   * @param matchVersion Lucene version
-   * to match See {@link <a href="#version">above</a>}
-   * 
-   * @param in
-   *          the input to split up into tokens
-   */
-  public ArabicLetterTokenizer(Version matchVersion, Reader in) {
-    super(matchVersion, in);
-  }
-
-  /**
-   * Construct a new ArabicLetterTokenizer using a given {@link AttributeSource}.
-   * 
-   * @param matchVersion
-   *          Lucene version to match See {@link <a href="#version">above</a>}
-   * @param source
-   *          the attribute source to use for this Tokenizer
-   * @param in
-   *          the input to split up into tokens
-   */
-  public ArabicLetterTokenizer(Version matchVersion, AttributeSource source, Reader in) {
-    super(matchVersion, source, in);
-  }
-
-  /**
-   * Construct a new ArabicLetterTokenizer using a given
-   * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * @param
-   * matchVersion Lucene version to match See
-   * {@link <a href="#version">above</a>}
-   * 
-   * @param factory
-   *          the attribute factory to use for this Tokenizer
-   * @param in
-   *          the input to split up into tokens
-   */
-  public ArabicLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
-    super(matchVersion, factory, in);
-  }
-  
-  /**
-   * Allows for Letter category or NonspacingMark category
-   * @see org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int)
-   */
-  @Override
-  protected boolean isTokenChar(int c) {
-    return super.isTokenChar(c) || Character.getType(c) == Character.NON_SPACING_MARK;
-  }
-
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@ -38,14 +38,6 @@ import org.tartarus.snowball.ext.CatalanStemmer;

 /**
 * {@link Analyzer} for Catalan.
- * <p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating CatalanAnalyzer:
- * <ul>
- *   <li> As of 3.6, ElisionFilter with a set of Catalan 
- *        contractions is used by default.
- * </ul>
 */
 public final class CatalanAnalyzer extends StopwordAnalyzerBase {
  private final CharArraySet stemExclusionSet;
@ -126,8 +118,8 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
   * @return A
   *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
   *         built from an {@link StandardTokenizer} filtered with
-   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
-   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, 
+   *         {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is
   *         provided and {@link SnowballFilter}.
   */
  @Override
@ -135,9 +127,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
-    }
+    result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@ -89,16 +89,11 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      // run the widthfilter first before bigramming, it sometimes combines characters.
-      TokenStream result = new CJKWidthFilter(source);
-      result = new LowerCaseFilter(matchVersion, result);
-      result = new CJKBigramFilter(result);
-      return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
-    } else {
-      final Tokenizer source = new CJKTokenizer(reader);
-      return new TokenStreamComponents(source, new StopFilter(matchVersion, source, stopwords));
-    }
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    // run the widthfilter first before bigramming, it sometimes combines characters.
+    TokenStream result = new CJKWidthFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new CJKBigramFilter(result);
+    return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
@ -1,317 +0,0 @@
-package org.apache.lucene.analysis.cjk;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * CJKTokenizer is designed for Chinese, Japanese, and Korean languages.
- * <p>  
- * The tokens returned are every two adjacent characters with overlap match.
- * </p>
- * <p>
- * Example: "java C1C2C3C4" will be segmented to: "java" "C1C2" "C2C3" "C3C4".
- * </p>
- * Additionally, the following is applied to Latin text (such as English):
- * <ul>
- * <li>Text is converted to lowercase.
- * <li>Numeric digits, '+', '#', and '_' are tokenized as letters.
- * <li>Full-width forms are converted to half-width forms.
- * </ul>
- * For more info on Asian language (Chinese, Japanese, and Korean) text segmentation:
- * please search  <a
- * href="http://www.google.com/search?q=word+chinese+segment">google</a>
- *
- * @deprecated Use StandardTokenizer, CJKWidthFilter, CJKBigramFilter, and LowerCaseFilter instead.
- */
-@Deprecated
-public final class CJKTokenizer extends Tokenizer {
-    //~ Static fields/initializers ---------------------------------------------
-    /** Word token type */
-    static final int WORD_TYPE = 0;
-  
-    /** Single byte token type */
-    static final int SINGLE_TOKEN_TYPE = 1;
-
-    /** Double byte token type */
-    static final int DOUBLE_TOKEN_TYPE = 2;
-  
-    /** Names for token types */
-    static final String[] TOKEN_TYPE_NAMES = { "word", "single", "double" };
-  
-    /** Max word length */
-    private static final int MAX_WORD_LEN = 255;
-
-    /** buffer size: */
-    private static final int IO_BUFFER_SIZE = 256;
-
-    //~ Instance fields --------------------------------------------------------
-
-    /** word offset, used to imply which character(in ) is parsed */
-    private int offset = 0;
-
-    /** the index used only for ioBuffer */
-    private int bufferIndex = 0;
-
-    /** data length */
-    private int dataLen = 0;
-
-    /**
-     * character buffer, store the characters which are used to compose <br>
-     * the returned Token
-     */
-    private final char[] buffer = new char[MAX_WORD_LEN];
-
-    /**
-     * I/O buffer, used to store the content of the input(one of the <br>
-     * members of Tokenizer)
-     */
-    private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
-
-    /** word type: single=>ASCII  double=>non-ASCII word=>default */
-    private int tokenType = WORD_TYPE;
-
-    /**
-     * tag: previous character is a cached double-byte character  "C1C2C3C4"
-     * ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened)
-     * C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4"
-     */
-    private boolean preIsTokened = false;
-
-    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-    
-    //~ Constructors -----------------------------------------------------------
-
-    /**
-     * Construct a token stream processing the given input.
-     *
-     * @param in I/O reader
-     */
-    public CJKTokenizer(Reader in) {
-      super(in);
-    }
-
-    public CJKTokenizer(AttributeSource source, Reader in) {
-      super(source, in);
-    }
-
-    public CJKTokenizer(AttributeFactory factory, Reader in) {
-      super(factory, in);
-    }
-    
-    //~ Methods ----------------------------------------------------------------
-
-    /**
-     * Returns true for the next token in the stream, or false at EOS.
-     * See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
-     * for detail.
-     *
-     * @return false for end of stream, true otherwise
-     *
-     * @throws java.io.IOException - throw IOException when read error <br>
-     *         happened in the InputStream
-     *
-     */
-    @Override
-    public boolean incrementToken() throws IOException {
-        clearAttributes();
-        /** how many character(s) has been stored in buffer */
-
-        while(true) { // loop until we find a non-empty token
-
-          int length = 0;
-
-          /** the position used to create Token */
-          int start = offset;
-
-          while (true) { // loop until we've found a full token
-            /** current character */
-            char c;
-
-            /** unicode block of current character for detail */
-            Character.UnicodeBlock ub;
-
-            offset++;
-
-            if (bufferIndex >= dataLen) {
-                dataLen = input.read(ioBuffer);
-                bufferIndex = 0;
-            }
-
-            if (dataLen == -1) {
-                if (length > 0) {
-                    if (preIsTokened == true) {
-                        length = 0;
-                        preIsTokened = false;
-                    }
-                    else{
-                      offset--;
-                    }
-
-                    break;
-                } else {
-                    offset--;
-                    return false;
-                }
-            } else {
-                //get current character
-                c = ioBuffer[bufferIndex++];
-
-                //get the UnicodeBlock of the current character
-                ub = Character.UnicodeBlock.of(c);
-            }
-
-            //if the current character is ASCII or Extend ASCII
-            if ((ub == Character.UnicodeBlock.BASIC_LATIN)
-                    || (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
-               ) {
-                if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
-                  int i = (int) c;
-                  if (i >= 65281 && i <= 65374) {
-                    // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
-                    i = i - 65248;
-                    c = (char) i;
-                  }
-                }
-
-                // if the current character is a letter or "_" "+" "#"
-                if (Character.isLetterOrDigit(c)
-                        || ((c == '_') || (c == '+') || (c == '#'))
-                   ) {
-                    if (length == 0) {
-                        // "javaC1C2C3C4linux" <br>
-                        //      ^--: the current character begin to token the ASCII
-                        // letter
-                        start = offset - 1;
-                    } else if (tokenType == DOUBLE_TOKEN_TYPE) {
-                        // "javaC1C2C3C4linux" <br>
-                        //              ^--: the previous non-ASCII
-                        // : the current character
-                        offset--;
-                        bufferIndex--;
-
-                        if (preIsTokened == true) {
-                            // there is only one non-ASCII has been stored
-                            length = 0;
-                            preIsTokened = false;
-                            break;
-                        } else {
-                            break;
-                        }
-                    }
-
-                    // store the LowerCase(c) in the buffer
-                    buffer[length++] = Character.toLowerCase(c);
-                    tokenType = SINGLE_TOKEN_TYPE;
-
-                    // break the procedure if buffer overflowed!
-                    if (length == MAX_WORD_LEN) {
-                        break;
-                    }
-                } else if (length > 0) {
-                    if (preIsTokened == true) {
-                        length = 0;
-                        preIsTokened = false;
-                    } else {
-                        break;
-                    }
-                }
-            } else {
-                // non-ASCII letter, e.g."C1C2C3C4"
-                if (Character.isLetter(c)) {
-                    if (length == 0) {
-                        start = offset - 1;
-                        buffer[length++] = c;
-                        tokenType = DOUBLE_TOKEN_TYPE;
-                    } else {
-                      if (tokenType == SINGLE_TOKEN_TYPE) {
-                            offset--;
-                            bufferIndex--;
-
-                            //return the previous ASCII characters
-                            break;
-                        } else {
-                            buffer[length++] = c;
-                            tokenType = DOUBLE_TOKEN_TYPE;
-
-                            if (length == 2) {
-                                offset--;
-                                bufferIndex--;
-                                preIsTokened = true;
-
-                                break;
-                            }
-                        }
-                    }
-                } else if (length > 0) {
-                    if (preIsTokened == true) {
-                        // empty the buffer
-                        length = 0;
-                        preIsTokened = false;
-                    } else {
-                        break;
-                    }
-                }
-            }
-        }
-      
-        if (length > 0) {
-          termAtt.copyBuffer(buffer, 0, length);
-          offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
-          typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]);
-          return true;
-        } else if (dataLen == -1) {
-          offset--;
-          return false;
-        }
-
-        // Cycle back and try for the next token (don't
-        // return an empty string)
-      }
-    }
-    
-    @Override
-    public final void end() {
-      // set final offset
-      final int finalOffset = correctOffset(offset);
-      this.offsetAtt.setOffset(finalOffset, finalOffset);
-    }
-    
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      offset = bufferIndex = dataLen = 0;
-      preIsTokened = false;
-      tokenType = WORD_TYPE;
-    }
-    
-    @Override
-    public void reset(Reader reader) throws IOException {
-      super.reset(reader);
-      reset();
-    }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
@ -1,50 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Tokenizer;
-
-/**
- * An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and
- * filters with {@link ChineseFilter}
- * @deprecated (3.1) Use {@link StandardAnalyzer} instead, which has the same functionality.
- * This analyzer will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ChineseAnalyzer extends Analyzer {
-
-  /**
-   * Creates
-   * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
-   * used to tokenize all the text in the provided {@link Reader}.
-   * 
-   * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
-   *         built from a {@link ChineseTokenizer} filtered with
-   *         {@link ChineseFilter}
-   */
-    @Override
-    protected TokenStreamComponents createComponents(String fieldName,
-        Reader reader) {
-      final Tokenizer source = new ChineseTokenizer(reader);
-      return new TokenStreamComponents(source, new ChineseFilter(source));
-    }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
@ -1,104 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
-/**
- * A {@link TokenFilter} with a stop word table.  
- * <ul>
- * <li>Numeric tokens are removed.
- * <li>English tokens must be larger than 1 character.
- * <li>One Chinese character as one Chinese word.
- * </ul>
- * TO DO:
- * <ol>
- * <li>Add Chinese stop words, such as \ue400
- * <li>Dictionary based Chinese word extraction
- * <li>Intelligent Chinese word extraction
- * </ol>
- * 
- * @deprecated (3.1) Use {@link StopFilter} instead, which has the same functionality.
- * This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ChineseFilter extends TokenFilter {
-
-
-    // Only English now, Chinese to be added later.
-    public static final String[] STOP_WORDS = {
-    "and", "are", "as", "at", "be", "but", "by",
-    "for", "if", "in", "into", "is", "it",
-    "no", "not", "of", "on", "or", "such",
-    "that", "the", "their", "then", "there", "these",
-    "they", "this", "to", "was", "will", "with"
-    };
-
-
-    private CharArraySet stopTable;
-
-    private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    
-    public ChineseFilter(TokenStream in) {
-        super(in);
-
-        stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false);
-    }
-
-    @Override
-    public boolean incrementToken() throws IOException {
-
-        while (input.incrementToken()) {
-            char text[] = termAtt.buffer();
-            int termLength = termAtt.length();
-
-          // why not key off token type here assuming ChineseTokenizer comes first?
-            if (!stopTable.contains(text, 0, termLength)) {
-                switch (Character.getType(text[0])) {
-
-                case Character.LOWERCASE_LETTER:
-                case Character.UPPERCASE_LETTER:
-
-                    // English word/token should larger than 1 character.
-                    if (termLength>1) {
-                        return true;
-                    }
-                    break;
-                case Character.OTHER_LETTER:
-
-                    // One Chinese character as one Chinese word.
-                    // Chinese word extraction to be added later here.
-
-                    return true;
-                }
-
-            }
-
-        }
-        return false;
-    }
-
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
@ -1,175 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-
-/**
- * Tokenize Chinese text as individual chinese characters.
- * 
- * <p>
- * The difference between ChineseTokenizer and
- * CJKTokenizer is that they have different
- * token parsing logic.
- * </p>
- * <p>
- * For example, if the Chinese text
- * "C1C2C3C4" is to be indexed:
- * <ul>
- * <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4. 
- * <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.
- * </ul>
- * </p>
- * <p>
- * Therefore the index created by CJKTokenizer is much larger.
- * </p>
- * <p>
- * The problem is that when searching for C1, C1C2, C1C3,
- * C4C2, C1C2C3 ... the ChineseTokenizer works, but the
- * CJKTokenizer will not work.
- * </p>
- * @deprecated (3.1) Use {@link StandardTokenizer} instead, which has the same functionality.
- * This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ChineseTokenizer extends Tokenizer {
-
-
-    public ChineseTokenizer(Reader in) {
-      super(in);
-    }
-
-    public ChineseTokenizer(AttributeSource source, Reader in) {
-      super(source, in);
-    }
-
-    public ChineseTokenizer(AttributeFactory factory, Reader in) {
-      super(factory, in);
-    }
-       
-    private int offset = 0, bufferIndex=0, dataLen=0;
-    private final static int MAX_WORD_LEN = 255;
-    private final static int IO_BUFFER_SIZE = 1024;
-    private final char[] buffer = new char[MAX_WORD_LEN];
-    private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
-
-
-    private int length;
-    private int start;
-
-    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-    
-    private final void push(char c) {
-
-        if (length == 0) start = offset-1;            // start of token
-        buffer[length++] = Character.toLowerCase(c);  // buffer it
-
-    }
-
-    private final boolean flush() {
-
-        if (length>0) {
-            //System.out.println(new String(buffer, 0,
-            //length));
-          termAtt.copyBuffer(buffer, 0, length);
-          offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
-          return true;
-        }
-        else
-            return false;
-    }
-
-    @Override
-    public boolean incrementToken() throws IOException {
-        clearAttributes();
-
-        length = 0;
-        start = offset;
-
-
-        while (true) {
-
-            final char c;
-            offset++;
-
-            if (bufferIndex >= dataLen) {
-                dataLen = input.read(ioBuffer);
-                bufferIndex = 0;
-            }
-
-            if (dataLen == -1) {
-              offset--;
-              return flush();
-            } else
-                c = ioBuffer[bufferIndex++];
-
-
-            switch(Character.getType(c)) {
-
-            case Character.DECIMAL_DIGIT_NUMBER:
-            case Character.LOWERCASE_LETTER:
-            case Character.UPPERCASE_LETTER:
-                push(c);
-                if (length == MAX_WORD_LEN) return flush();
-                break;
-
-            case Character.OTHER_LETTER:
-                if (length>0) {
-                    bufferIndex--;
-                    offset--;
-                    return flush();
-                }
-                push(c);
-                return flush();
-
-            default:
-                if (length>0) return flush();
-                break;
-            }
-        }
-    }
-    
-    @Override
-    public final void end() {
-      // set final offset
-      final int finalOffset = correctOffset(offset);
-      this.offsetAtt.setOffset(finalOffset, finalOffset);
-    }
-
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      offset = bufferIndex = dataLen = 0;
-    }
-    
-    @Override
-    public void reset(Reader input) throws IOException {
-      super.reset(input);
-      reset();
-    }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/package.html
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/package.html
@ -1,41 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-</head>
-<body>
-Analyzer for Chinese, which indexes unigrams (individual chinese characters).
-<p>
-Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
-<ul>
-	<li>StandardAnalyzer: Index unigrams (individual Chinese characters) as a token.
-	<li>CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
-	<li>SmartChineseAnalyzer (in the analyzers/smartcn package): Index words (attempt to segment Chinese text into words) as tokens.
-</ul>
-
-Example phrase： "我是中国人"
-<ol>
-	<li>StandardAnalyzer: 我－是－中－国－人</li>
-	<li>CJKAnalyzer: 我是－是中－中国－国人</li>
-	<li>SmartChineseAnalyzer: 我－是－中国－人</li>
-</ol>
-</p>
-
-</body>
-</html>
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@ -40,17 +40,6 @@ import java.io.*;
 * all). A default set of stopwords is used unless an alternative list is
 * specified.
 * </p>
- * 
- * <a name="version"/>
- * <p>
- * You must specify the required {@link Version} compatibility when creating
- * CzechAnalyzer:
- * <ul>
- * <li>As of 3.1, words are stemmed with {@link CzechStemFilter}
- * <li>As of 2.9, StopFilter preserves position increments
- * <li>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
- * <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- * </ul>
 */
 public final class CzechAnalyzer extends StopwordAnalyzerBase {
  /** File containing default Czech stopwords. */
@ -86,8 +75,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
   *
-   * @param matchVersion Lucene version to match See
-   *          {@link <a href="#version">above</a>}
+   * @param matchVersion Lucene version to match
   */
 	public CzechAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
@ -96,8 +84,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the given stop words.
   *
-   * @param matchVersion Lucene version to match See
-   *          {@link <a href="#version">above</a>}
+   * @param matchVersion Lucene version to match
   * @param stopwords a stopword set
   */
  public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) {
@ -108,8 +95,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
   * Builds an analyzer with the given stop words and a set of work to be
   * excluded from the {@link CzechStemFilter}.
   * 
-   * @param matchVersion Lucene version to match See
-   *          {@link <a href="#version">above</a>}
+   * @param matchVersion Lucene version to match
   * @param stopwords a stopword set
   * @param stemExclusionTable a stemming exclusion set
   */
@ -127,7 +113,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
   *         built from a {@link StandardTokenizer} filtered with
   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
   *         , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
-   *         a version is >= LUCENE_31 and a stem exclusion set is provided via
+   *         a stem exclusion set is provided via
   *         {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a
   *         {@link KeywordMarkerFilter} is added before
   *         {@link CzechStemFilter}.
@ -139,11 +125,9 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
-    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      if(!this.stemExclusionTable.isEmpty())
-        result = new KeywordMarkerFilter(result, stemExclusionTable);
-      result = new CzechStemFilter(result);
-    }
+    if(!this.stemExclusionTable.isEmpty())
+      result = new KeywordMarkerFilter(result, stemExclusionTable);
+    result = new CzechStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.de;

 import java.io.IOException;
 import java.io.Reader;
-import java.util.Arrays;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -37,7 +36,6 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.German2Stemmer;

 /**
 * {@link Analyzer} for German language. 
@ -49,39 +47,11 @@ import org.tartarus.snowball.ext.German2Stemmer;
 * exclusion list is empty by default.
 * </p>
 * 
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating GermanAnalyzer:
- * <ul>
- *   <li> As of 3.6, GermanLightStemFilter is used for less aggressive stemming.
- *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, and 
- *        Snowball stopwords are used by default.
- *   <li> As of 2.9, StopFilter preserves position
- *        increments
- * </ul>
- * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
 public final class GermanAnalyzer extends StopwordAnalyzerBase {
  
-  /** @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) */
-  @Deprecated
-  private final static String[] GERMAN_STOP_WORDS = {
-    "einer", "eine", "eines", "einem", "einen",
-    "der", "die", "das", "dass", "daß",
-    "du", "er", "sie", "es",
-    "was", "wer", "wie", "wir",
-    "und", "oder", "ohne", "mit",
-    "am", "im", "in", "aus", "auf",
-    "ist", "sein", "war", "wird",
-    "ihr", "ihre", "ihres",
-    "als", "für", "von", "mit",
-    "dich", "dir", "mich", "mir",
-    "mein", "sein", "kein",
-    "durch", "wegen", "wird"
-  };
-  
  /** File containing default German stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "german_stop.txt";
  
@ -94,10 +64,6 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
  }
  
  private static class DefaultSetHolder {
-    /** @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) */
-    @Deprecated
-    private static final CharArraySet DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
-        Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
    private static final CharArraySet DEFAULT_SET;
    static {
      try {
@ -125,9 +91,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * {@link #getDefaultStopSet()}.
   */
  public GermanAnalyzer(Version matchVersion) {
-    this(matchVersion,
-        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_SET
-            : DefaultSetHolder.DEFAULT_SET_30);
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
  }
  
  /**
@ -176,14 +140,8 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    result = new KeywordMarkerFilter(result, exclusionSet);
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      result = new GermanNormalizationFilter(result);
-      result = new GermanLightStemFilter(result);
-    } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      result = new SnowballFilter(result, new German2Stemmer());
-    } else {
-      result = new GermanStemFilter(result);
-    }
+    result = new GermanNormalizationFilter(result);
+    result = new GermanLightStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -37,15 +37,6 @@ import org.apache.lucene.util.Version;
 * that will not be indexed at all).
 * A default set of stopwords is used unless an alternative list is specified.
 * </p>
- *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating GreekAnalyzer:
- * <ul>
- *   <li> As of 3.1, StandardFilter and GreekStemmer are used by default.
- *   <li> As of 2.9, StopFilter preserves position
- *        increments
- * </ul>
 * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
@ -78,8 +69,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
  
  /**
   * Builds an analyzer with the default stop words.
-   * @param matchVersion Lucene compatibility version,
-   *   See <a href="#version">above</a>
+   * @param matchVersion Lucene compatibility version
   */
  public GreekAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
@ -91,8 +81,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
   * <b>NOTE:</b> The stopwords set should be pre-processed with the logic of 
   * {@link GreekLowerCaseFilter} for best results.
   *  
-   * @param matchVersion Lucene compatibility version,
-   *   See <a href="#version">above</a>
+   * @param matchVersion Lucene compatibility version
   * @param stopwords a stopword set
   */
  public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) {
@ -114,11 +103,9 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
-    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new StandardFilter(matchVersion, result);
+    result = new StandardFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
-    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new GreekStemFilter(result);
+    result = new GreekStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
@ -196,7 +196,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"),
      false);
  
@ -222,7 +222,7 @@ public class GreekStemmer {
    return len;
  }

-  private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ",
          "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ",
          "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ",
@ -247,7 +247,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ",
          "πεθ", "πικρ", "ποτ", "σιχ", "χ"), 
      false);
@ -274,11 +274,11 @@ public class GreekStemmer {
    return len;
  }

-  private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("τρ", "τσ"),
      false);

-  private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ",
          "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ",
          "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ",
@ -337,7 +337,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ",
          "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ",
          "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), 
@ -425,11 +425,11 @@ public class GreekStemmer {
    return len;
  }

-  private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"),
      false);

-  private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"),
      false);
  
@ -449,7 +449,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"),
      false);
  
@ -483,7 +483,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ",
          "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ",
          "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε",
@ -521,7 +521,7 @@ public class GreekStemmer {
   return len;
  }
  
-  private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ",
          "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ",
          "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ",
@ -530,7 +530,7 @@ public class GreekStemmer {
          "ουλαμ", "ουρ", "π", "τρ", "μ"), 
      false);
  
-  private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("ψοφ", "ναυλοχ"),
      false);
  
@ -567,7 +567,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"),
      false);
  
@ -587,7 +587,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"),
      false);
  
@ -601,7 +601,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"),
      false);
  
@ -625,7 +625,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_31,
+  private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_50,
      Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"),
      false);
  
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@ -94,7 +94,8 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
   * @return A
   *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
   *         built from an {@link StandardTokenizer} filtered with
-   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+   *         {@link StandardFilter}, {@link EnglishPossessiveFilter}, 
+   *         {@link LowerCaseFilter}, {@link StopFilter}
   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
   *         provided and {@link PorterStemFilter}.
   */
@ -103,9 +104,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
-    // prior to this we get the classic behavior, standardfilter does it for us.
-    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new EnglishPossessiveFilter(matchVersion, result);
+    result = new EnglishPossessiveFilter(matchVersion, result);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
@ -26,30 +26,13 @@ import org.apache.lucene.util.Version;

 /**
 * TokenFilter that removes possessives (trailing 's) from words.
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating EnglishPossessiveFilter:
- * <ul>
- *    <li> As of 3.6, U+2019 RIGHT SINGLE QUOTATION MARK and 
- *         U+FF07 FULLWIDTH APOSTROPHE are also treated as
- *         quotation marks.
- * </ul>
 */
 public final class EnglishPossessiveFilter extends TokenFilter {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private Version matchVersion;
-
-  /**
-   * @deprecated Use {@link #EnglishPossessiveFilter(Version, TokenStream)} instead.
-   */
-  @Deprecated
-  public EnglishPossessiveFilter(TokenStream input) {
-    this(Version.LUCENE_35, input);
-  }

+  // NOTE: version now unused
  public EnglishPossessiveFilter(Version version, TokenStream input) {
    super(input);
-    this.matchVersion = version;
  }

  @Override
@ -63,7 +46,8 @@ public final class EnglishPossessiveFilter extends TokenFilter {
    
    if (bufferLength >= 2 && 
        (buffer[bufferLength-2] == '\'' || 
-         (matchVersion.onOrAfter(Version.LUCENE_36) && (buffer[bufferLength-2] == '\u2019' || buffer[bufferLength-2] == '\uFF07'))) &&
+         buffer[bufferLength-2] == '\u2019' || 
+         buffer[bufferLength-2] == '\uFF07') &&
        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
      termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
    }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
@ -281,9 +281,9 @@ public class KStemmer {
    DictEntry entry;

    CharArrayMap<DictEntry> d = new CharArrayMap<DictEntry>(
-        Version.LUCENE_31, 1000, false);
+        Version.LUCENE_50, 1000, false);
    
-    d = new CharArrayMap<DictEntry>(Version.LUCENE_31, 1000, false);
+    d = new CharArrayMap<DictEntry>(Version.LUCENE_50, 1000, false);
    for (int i = 0; i < exceptionWords.length; i++) {
      if (!d.containsKey(exceptionWords[i])) {
        entry = new DictEntry(exceptionWords[i], true);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@ -34,17 +34,9 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.SpanishStemmer;

 /**
 * {@link Analyzer} for Spanish.
- * <p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating SpanishAnalyzer:
- * <ul>
- *   <li> As of 3.6, SpanishLightStemFilter is used for less aggressive stemming.
- * </ul>
 */
 public final class SpanishAnalyzer extends StopwordAnalyzerBase {
  private final CharArraySet stemExclusionSet;
@ -132,11 +124,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      result = new SpanishLightStemFilter(result);
-    } else {
-      result = new SnowballFilter(result, new SpanishStemmer());
-    }
+    result = new SpanishLightStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
@ -36,7 +35,7 @@ import org.apache.lucene.util.Version;
 /**
 * {@link Analyzer} for Persian.
 * <p>
- * This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around
+ * This Analyzer uses {@link PersianCharFilter} which implies tokenizing around
 * zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
 * yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
 * </p>
@ -118,12 +117,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    final Tokenizer source;
-    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      source = new StandardTokenizer(matchVersion, reader);
-    } else {
-      source = new ArabicLetterTokenizer(matchVersion, reader);
-    }
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new LowerCaseFilter(matchVersion, source);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
@ -140,8 +134,6 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected Reader initReader(Reader reader) {
-    return matchVersion.onOrAfter(Version.LUCENE_31) ? 
-       new PersianCharFilter(CharReader.get(reader)) :
-       reader;
+    return new PersianCharFilter(CharReader.get(reader)); 
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -35,7 +35,6 @@ import org.apache.lucene.util.Version;

 import java.io.IOException;
 import java.io.Reader;
-import java.util.Arrays;

 /**
 * {@link Analyzer} for French language. 
@ -47,53 +46,11 @@ import java.util.Arrays;
 * exclusion list is empty by default.
 * </p>
 *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating FrenchAnalyzer:
- * <ul>
- *   <li> As of 3.6, FrenchLightStemFilter is used for less aggressive stemming.
- *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
- *        LowerCaseFilter is used prior to StopFilter, and ElisionFilter and 
- *        Snowball stopwords are used by default.
- *   <li> As of 2.9, StopFilter preserves position
- *        increments
- * </ul>
- *
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
 public final class FrenchAnalyzer extends StopwordAnalyzerBase {

-  /**
-   * Extended list of typical French stopwords.
-   * @deprecated (3.1) remove in Lucene 5.0 (index bw compat)
-   */
-  @Deprecated
-  private final static String[] FRENCH_STOP_WORDS = {
-    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
-    "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
-    "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
-    "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
-    "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
-    "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
-    "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
-    "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
-    "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
-    "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
-    "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui", "là",
-    "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi",
-    "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
-    "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
-    "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
-    "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
-    "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
-    "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
-    "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
-    "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers",
-    "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès",
-    "été", "être", "ô"
-  };
-
  /** File containing default French stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "french_stop.txt";
  
@ -111,11 +68,6 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
  }
  
  private static class DefaultSetHolder {
-    /** @deprecated (3.1) remove this in Lucene 5.0, index bw compat */
-    @Deprecated
-    static final CharArraySet DEFAULT_STOP_SET_30 = CharArraySet
-        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
-            false));
    static final CharArraySet DEFAULT_STOP_SET;
    static {
      try {
@ -133,9 +85,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
   */
  public FrenchAnalyzer(Version matchVersion) {
-    this(matchVersion,
-        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
-            : DefaultSetHolder.DEFAULT_STOP_SET_30);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
@ -182,30 +132,15 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      TokenStream result = new StandardFilter(matchVersion, source);
-      result = new ElisionFilter(matchVersion, result);
-      result = new LowerCaseFilter(matchVersion, result);
-      result = new StopFilter(matchVersion, result, stopwords);
-      if(!excltable.isEmpty())
-        result = new KeywordMarkerFilter(result, excltable);
-      if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-        result = new FrenchLightStemFilter(result);
-      } else {
-        result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
-      }
-      return new TokenStreamComponents(source, result);
-    } else {
-      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      TokenStream result = new StandardFilter(matchVersion, source);
-      result = new StopFilter(matchVersion, result, stopwords);
-      if(!excltable.isEmpty())
-        result = new KeywordMarkerFilter(result, excltable);
-      result = new FrenchStemFilter(result);
-      // Convert to lowercase after stemming!
-      return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
-    }
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new ElisionFilter(matchVersion, result);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!excltable.isEmpty())
+      result = new KeywordMarkerFilter(result, excltable);
+    result = new FrenchLightStemFilter(result);
+    return new TokenStreamComponents(source, result);
  }
 }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
@ -1,90 +0,0 @@
-package org.apache.lucene.analysis.fr;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-import java.io.IOException;
-
-/**
- * A {@link TokenFilter} that stems french words. 
- * <p>
- * The used stemmer can be changed at runtime after the
- * filter object is created (as long as it is a {@link FrenchStemmer}).
- * </p>
- * <p>
- * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
- * the {@link KeywordAttribute} before this {@link TokenStream}.
- * </p>
- * @see KeywordMarkerFilter
- * @deprecated (3.1) Use {@link SnowballFilter} with 
- * {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
- * same functionality. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class FrenchStemFilter extends TokenFilter {
-
-	/**
-	 * The actual token in the input stream.
-	 */
-	private FrenchStemmer stemmer = new FrenchStemmer();
-	
-	private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
-
-	public FrenchStemFilter( TokenStream in ) {
-    super(in);
-	}
-
-	/**
-	 * @return  Returns true for the next token in the stream, or false at EOS
-	 */
-	@Override
-	public boolean incrementToken() throws IOException {
-	  if (input.incrementToken()) {
-	    String term = termAtt.toString();
-
-	    // Check the exclusion table
-	    if (!keywordAttr.isKeyword()) {
-	      String s = stemmer.stem( term );
-	      // If not stemmed, don't waste the time  adjusting the token.
-	      if ((s != null) && !s.equals( term ) )
-	        termAtt.setEmpty().append(s);
-	    }
-	    return true;
-	  } else {
-	    return false;
-	  }
-	}
-	/**
-	 * Set a alternative/custom {@link FrenchStemmer} for this filter.
-	 */
-	public void setStemmer( FrenchStemmer stemmer ) {
-		if ( stemmer != null ) {
-			this.stemmer = stemmer;
-		}
-	}
-}
-
-
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
@ -1,712 +0,0 @@
-package org.apache.lucene.analysis.fr;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A stemmer for French words. 
- * <p>
- * The algorithm is based on the work of
- * Dr Martin Porter on his snowball project<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
- * (French stemming algorithm) for details
- * </p>
- * @deprecated Use {@link org.tartarus.snowball.ext.FrenchStemmer} instead, 
- * which has the same functionality. This filter will be removed in Lucene 4.0
- */
-@Deprecated
-public class FrenchStemmer {
-
-    /**
-     * Buffer for the terms while stemming them.
-     */
-    private StringBuilder sb = new StringBuilder();
-
-    /**
-     * A temporary buffer, used to reconstruct R2
-     */
-     private StringBuilder tb = new StringBuilder();
-
-	/**
-	 * Region R0 is equal to the whole buffer
-	 */
-	private String R0;
-
-	/**
-	 * Region RV
-	 * "If the word begins with two vowels, RV is the region after the third letter,
-	 * otherwise the region after the first vowel not at the beginning of the word,
-	 * or the end of the word if these positions cannot be found."
-	 */
-    private String RV;
-
-	/**
-	 * Region R1
-	 * "R1 is the region after the first non-vowel following a vowel
-	 * or is the null region at the end of the word if there is no such non-vowel"
-	 */
-    private String R1;
-
-	/**
-	 * Region R2
-	 * "R2 is the region after the first non-vowel in R1 following a vowel
-	 * or is the null region at the end of the word if there is no such non-vowel"
-	 */
-    private String R2;
-
-
-	/**
-	 * Set to true if we need to perform step 2
-	 */
-    private boolean suite;
-
-	/**
-	 * Set to true if the buffer was modified
-	 */
-    private boolean modified;
-
-
-    /**
-     * Stems the given term to a unique <tt>discriminator</tt>.
-     *
-     * @param term  java.langString The term that should be stemmed
-     * @return java.lang.String  Discriminator for <tt>term</tt>
-     */
-    protected String stem( String term ) {
-		if ( !isStemmable( term ) ) {
-			return term;
-		}
-
-		// Use lowercase for medium stemming.
-		term = term.toLowerCase();
-
-		// Reset the StringBuilder.
-		sb.delete( 0, sb.length() );
-		sb.insert( 0, term );
-
-		// reset the booleans
-		modified = false;
-		suite = false;
-
-		sb = treatVowels( sb );
-
-		setStrings();
-
-		step1();
-
-		if (!modified || suite)
-		{
-			if (RV != null)
-			{
-				suite = step2a();
-				if (!suite)
-					step2b();
-			}
-		}
-
-		if (modified || suite)
-			step3();
-		else
-			step4();
-
-		step5();
-
-		step6();
-
-		return sb.toString();
-    }
-
-	/**
-	 * Sets the search region Strings<br>
-	 * it needs to be done each time the buffer was modified
-	 */
-	private void setStrings() {
-		// set the strings
-		R0 = sb.toString();
-		RV = retrieveRV( sb );
-		R1 = retrieveR( sb );
-		if ( R1 != null )
-		{
-			tb.delete( 0, tb.length() );
-			tb.insert( 0, R1 );
-			R2 = retrieveR( tb );
-		}
-		else
-			R2 = null;
-	}
-
-	/**
-	 * First step of the Porter Algorithm<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 */
-	private void step1( ) {
-		String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
-		deleteFrom( R2, suffix );
-
-		replaceFrom( R2, new String[] { "logies", "logie" }, "log" );
-		replaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
-		replaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
-
-		String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
-		deleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );
-
-		deleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
-		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
-		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
-		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
-		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
-
-		deleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
-		deleteFrom( RV, new String[] { "ements", "ement" } );
-
-		deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "abil", false, R0, "abl" );
-		deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "ic", false, R0, "iqU" );
-		deleteButSuffixFrom( R2, new String[] { "ités", "ité" }, "iv", true );
-
-		String[] autre = { "ifs", "ives", "if", "ive" };
-		deleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
-		deleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
-
-		replaceFrom( R0, new String[] { "eaux" }, "eau" );
-
-		replaceFrom( R1, new String[] { "aux" }, "al" );
-
-		deleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
-
-		deleteFrom( R2, new String[] { "eux" } );
-
-		// if one of the next steps is performed, we will need to perform step2a
-		boolean temp = false;
-		temp = replaceFrom( RV, new String[] { "amment" }, "ant" );
-		if (temp == true)
-			suite = true;
-		temp = replaceFrom( RV, new String[] { "emment" }, "ent" );
-		if (temp == true)
-			suite = true;
-		temp = deleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
-		if (temp == true)
-			suite = true;
-
-	}
-
-	/**
-	 * Second step (A) of the Porter Algorithm<br>
-	 * Will be performed if nothing changed from the first step
-	 * or changed were done in the amment, emment, ments or ment suffixes<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 *
-	 * @return boolean - true if something changed in the StringBuilder
-	 */
-	private boolean step2a() {
-		String[] search = { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira",
-							"irent", "iriez", "irez", "irions", "irons", "iront",
-							"issaIent", "issais", "issantes", "issante", "issants", "issant",
-							"issait", "issais", "issions", "issons", "issiez", "issez", "issent",
-							"isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" };
-		return deleteFromIfTestVowelBeforeIn( RV, search, false, RV );
-	}
-
-	/**
-	 * Second step (B) of the Porter Algorithm<br>
-	 * Will be performed if step 2 A was performed unsuccessfully<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 */
-	private void step2b() {
-		String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
-							"erons", "eront","erez", "èrent", "era", "ées", "iez",
-							"ée", "és", "er", "ez", "é" };
-		deleteFrom( RV, suffix );
-
-		String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
-							"antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant",
-							"ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
-		deleteButSuffixFrom( RV, search, "e", true );
-
-		deleteFrom( R2, new String[] { "ions" } );
-	}
-
-	/**
-	 * Third step of the Porter Algorithm<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 */
-	private void step3() {
-		if (sb.length()>0)
-		{
-			char ch = sb.charAt( sb.length()-1 );
-			if (ch == 'Y')
-			{
-				sb.setCharAt( sb.length()-1, 'i' );
-				setStrings();
-			}
-			else if (ch == 'ç')
-			{
-				sb.setCharAt( sb.length()-1, 'c' );
-				setStrings();
-			}
-		}
-	}
-
-	/**
-	 * Fourth step of the Porter Algorithm<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 */
-	private void step4() {
-		if (sb.length() > 1)
-		{
-			char ch = sb.charAt( sb.length()-1 );
-			if (ch == 's')
-			{
-				char b = sb.charAt( sb.length()-2 );
-				if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
-				{
-					sb.delete( sb.length() - 1, sb.length());
-					setStrings();
-				}
-			}
-		}
-		boolean found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
-		if (!found)
-		found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
-
-		replaceFrom( RV, new String[] { "Ière", "ière", "Ier", "ier" }, "i" );
-		deleteFrom( RV, new String[] { "e" } );
-		deleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
-	}
-
-	/**
-	 * Fifth step of the Porter Algorithm<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 */
-	private void step5() {
-		if (R0 != null)
-		{
-			if (R0.endsWith("enn") || R0.endsWith("onn") || R0.endsWith("ett") || R0.endsWith("ell") || R0.endsWith("eill"))
-			{
-				sb.delete( sb.length() - 1, sb.length() );
-				setStrings();
-			}
-		}
-	}
-
-	/**
-	 * Sixth (and last!) step of the Porter Algorithm<br>
-	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-	 */
-	private void step6() {
-		if (R0!=null && R0.length()>0)
-		{
-			boolean seenVowel = false;
-			boolean seenConson = false;
-			int pos = -1;
-			for (int i = R0.length()-1; i > -1; i--)
-			{
-				char ch = R0.charAt(i);
-				if (isVowel(ch))
-				{
-					if (!seenVowel)
-					{
-						if (ch == 'é' || ch == 'è')
-						{
-							pos = i;
-							break;
-						}
-					}
-					seenVowel = true;
-				}
-				else
-				{
-					if (seenVowel)
-						break;
-					else
-						seenConson = true;
-				}
-			}
-			if (pos > -1 && seenConson && !seenVowel)
-				sb.setCharAt(pos, 'e');
-		}
-	}
-
-	/**
-	 * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
-	 *
-	 * @param source java.lang.String - the primary source zone for search
-	 * @param search java.lang.String[] - the strings to search for suppression
-	 * @param from java.lang.String - the secondary source zone for search
-	 * @param prefix java.lang.String - the prefix to add to the search string to test
-	 * @return boolean - true if modified
-	 */
-	private boolean deleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
-		boolean found = false;
-		if (source!=null )
-		{
-			for (int i = 0; i < search.length; i++) {
-				if ( source.endsWith( search[i] ))
-				{
-					if (from!=null && from.endsWith( prefix + search[i] ))
-					{
-						sb.delete( sb.length() - search[i].length(), sb.length());
-						found = true;
-						setStrings();
-						break;
-					}
-				}
-			}
-		}
-		return found;
-	}
-
-	/**
-	 * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
-	 *
-	 * @param source java.lang.String - the primary source zone for search
-	 * @param search java.lang.String[] - the strings to search for suppression
-	 * @param vowel boolean - true if we need a vowel before the search string
-	 * @param from java.lang.String - the secondary source zone for search (where vowel could be)
-	 * @return boolean - true if modified
-	 */
-	private boolean deleteFromIfTestVowelBeforeIn( String source, String[] search, boolean vowel, String from ) {
-		boolean found = false;
-		if (source!=null && from!=null)
-		{
-			for (int i = 0; i < search.length; i++) {
-				if ( source.endsWith( search[i] ))
-				{
-					if ((search[i].length() + 1) <= from.length())
-					{
-						boolean test = isVowel(sb.charAt(sb.length()-(search[i].length()+1)));
-						if (test == vowel)
-						{
-							sb.delete( sb.length() - search[i].length(), sb.length());
-							modified = true;
-							found = true;
-							setStrings();
-							break;
-						}
-					}
-				}
-			}
-		}
-		return found;
-	}
-
-	/**
-	 * Delete a suffix searched in zone "source" if preceded by the prefix
-	 *
-	 * @param source java.lang.String - the primary source zone for search
-	 * @param search java.lang.String[] - the strings to search for suppression
-	 * @param prefix java.lang.String - the prefix to add to the search string to test
-	 * @param without boolean - true if it will be deleted even without prefix found
-	 */
-	private void deleteButSuffixFrom( String source, String[] search, String prefix, boolean without ) {
-		if (source!=null)
-		{
-			for (int i = 0; i < search.length; i++) {
-				if ( source.endsWith( prefix + search[i] ))
-				{
-					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
-					modified = true;
-					setStrings();
-					break;
-				}
-				else if ( without && source.endsWith( search[i] ))
-				{
-					sb.delete( sb.length() - search[i].length(), sb.length() );
-					modified = true;
-					setStrings();
-					break;
-				}
-			}
-		}
-	}
-
-	/**
-	 * Delete a suffix searched in zone "source" if preceded by prefix<br>
-	 * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
-	 * or delete the suffix if specified
-	 *
-	 * @param source java.lang.String - the primary source zone for search
-	 * @param search java.lang.String[] - the strings to search for suppression
-	 * @param prefix java.lang.String - the prefix to add to the search string to test
-	 * @param without boolean - true if it will be deleted even without prefix found
-	 */
-	private void deleteButSuffixFromElseReplace( String source, String[] search, String prefix, boolean without, String from, String replace ) {
-		if (source!=null)
-		{
-			for (int i = 0; i < search.length; i++) {
-				if ( source.endsWith( prefix + search[i] ))
-				{
-					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
-					modified = true;
-					setStrings();
-					break;
-				}
-				else if ( from!=null && from.endsWith( prefix + search[i] ))
-				{
-					sb.replace( sb.length() - (prefix.length() + search[i].length()), sb.length(), replace );
-					modified = true;
-					setStrings();
-					break;
-				}
-				else if ( without && source.endsWith( search[i] ))
-				{
-					sb.delete( sb.length() - search[i].length(), sb.length() );
-					modified = true;
-					setStrings();
-					break;
-				}
-			}
-		}
-	}
-
-	/**
-	 * Replace a search string with another within the source zone
-	 *
-	 * @param source java.lang.String - the source zone for search
-	 * @param search java.lang.String[] - the strings to search for replacement
-	 * @param replace java.lang.String - the replacement string
-	 */
-	private boolean replaceFrom( String source, String[] search, String replace ) {
-		boolean found = false;
-		if (source!=null)
-		{
-			for (int i = 0; i < search.length; i++) {
-				if ( source.endsWith( search[i] ))
-				{
-					sb.replace( sb.length() - search[i].length(), sb.length(), replace );
-					modified = true;
-					found = true;
-					setStrings();
-					break;
-				}
-			}
-		}
-		return found;
-	}
-
-	/**
-	 * Delete a search string within the source zone
-	 *
-	 * @param source the source zone for search
-	 * @param suffix the strings to search for suppression
-	 */
-	private void deleteFrom(String source, String[] suffix ) {
-		if (source!=null)
-		{
-			for (int i = 0; i < suffix.length; i++) {
-				if (source.endsWith( suffix[i] ))
-				{
-					sb.delete( sb.length() - suffix[i].length(), sb.length());
-					modified = true;
-					setStrings();
-					break;
-				}
-			}
-		}
-	}
-
-	/**
-	 * Test if a char is a french vowel, including accentuated ones
-	 *
-	 * @param ch the char to test
-	 * @return boolean - true if the char is a vowel
-	 */
-	private boolean isVowel(char ch) {
-		switch (ch)
-		{
-			case 'a':
-			case 'e':
-			case 'i':
-			case 'o':
-			case 'u':
-			case 'y':
-			case 'â':
-			case 'à':
-			case 'ë':
-			case 'é':
-			case 'ê':
-			case 'è':
-			case 'ï':
-			case 'î':
-			case 'ô':
-			case 'ü':
-			case 'ù':
-			case 'û':
-				return true;
-			default:
-				return false;
-		}
-	}
-
-	/**
-	 * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
-	 * "R is the region after the first non-vowel following a vowel
-	 * or is the null region at the end of the word if there is no such non-vowel"<br>
-	 * @param buffer java.lang.StringBuilder - the in buffer
-	 * @return java.lang.String - the resulting string
-	 */
-	private String retrieveR( StringBuilder buffer ) {
-		int len = buffer.length();
-		int pos = -1;
-		for (int c = 0; c < len; c++) {
-			if (isVowel( buffer.charAt( c )))
-			{
-				pos = c;
-				break;
-			}
-		}
-		if (pos > -1)
-		{
-			int consonne = -1;
-			for (int c = pos; c < len; c++) {
-				if (!isVowel(buffer.charAt( c )))
-				{
-					consonne = c;
-					break;
-				}
-			}
-			if (consonne > -1 && (consonne+1) < len)
-				return buffer.substring( consonne+1, len );
-			else
-				return null;
-		}
-		else
-			return null;
-	}
-
-	/**
-	 * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
-	 * "If the word begins with two vowels, RV is the region after the third letter,
-	 * otherwise the region after the first vowel not at the beginning of the word,
-	 * or the end of the word if these positions cannot be found."<br>
-	 * @param buffer java.lang.StringBuilder - the in buffer
-	 * @return java.lang.String - the resulting string
-	 */
-	private String retrieveRV( StringBuilder buffer ) {
-		int len = buffer.length();
-		if ( buffer.length() > 3)
-		{
-			if ( isVowel(buffer.charAt( 0 )) && isVowel(buffer.charAt( 1 ))) {
-				return buffer.substring(3,len);
-			}
-			else
-			{
-				int pos = 0;
-				for (int c = 1; c < len; c++) {
-					if (isVowel( buffer.charAt( c )))
-					{
-						pos = c;
-						break;
-					}
-				}
-				if ( pos+1 < len )
-					return buffer.substring( pos+1, len );
-				else
-					return null;
-			}
-		}
-		else
-			return null;
-	}
-
-
-
-    /**
-	 * Turns u and i preceded AND followed by a vowel to UpperCase<br>
-	 * Turns y preceded OR followed by a vowel to UpperCase<br>
-	 * Turns u preceded by q to UpperCase<br>
-     *
-     * @param buffer java.util.StringBuilder - the buffer to treat
-     * @return java.util.StringBuilder - the treated buffer
-     */
-    private StringBuilder treatVowels( StringBuilder buffer ) {
-		for ( int c = 0; c < buffer.length(); c++ ) {
-			char ch = buffer.charAt( c );
-
-			if (c == 0) // first char
-			{
-				if (buffer.length()>1)
-				{
-					if (ch == 'y' && isVowel(buffer.charAt( c + 1 )))
-						buffer.setCharAt( c, 'Y' );
-				}
-			}
-			else if (c == buffer.length()-1) // last char
-			{
-				if (ch == 'u' && buffer.charAt( c - 1 ) == 'q')
-					buffer.setCharAt( c, 'U' );
-				if (ch == 'y' && isVowel(buffer.charAt( c - 1 )))
-					buffer.setCharAt( c, 'Y' );
-			}
-			else // other cases
-			{
-				if (ch == 'u')
-				{
-					if (buffer.charAt( c - 1) == 'q')
-						buffer.setCharAt( c, 'U' );
-					else if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
-						buffer.setCharAt( c, 'U' );
-				}
-				if (ch == 'i')
-				{
-					if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
-						buffer.setCharAt( c, 'I' );
-				}
-				if (ch == 'y')
-				{
-					if (isVowel(buffer.charAt( c - 1 )) || isVowel(buffer.charAt( c + 1 )))
-						buffer.setCharAt( c, 'Y' );
-				}
-			}
-		}
-
-		return buffer;
-    }
-
-    /**
-     * Checks a term if it can be processed correctly.
-     *
-     * @return boolean - true if, and only if, the given term consists in letters.
-     */
-    private boolean isStemmable( String term ) {
-		boolean upper = false;
-		int first = -1;
-		for ( int c = 0; c < term.length(); c++ ) {
-			// Discard terms that contain non-letter characters.
-			if ( !Character.isLetter( term.charAt( c ) ) ) {
-				return false;
-			}
-			// Discard terms that contain multiple uppercase letters.
-			if ( Character.isUpperCase( term.charAt( c ) ) ) {
-				if ( upper ) {
-					return false;
-				}
-			// First encountered uppercase letter, set flag and save
-			// position.
-				else {
-					first = c;
-					upper = true;
-				}
-			}
-		}
-		// Discard the term if it contains a single uppercase letter that
-		// is not starting the term.
-		if ( first > 0 ) {
-			return false;
-		}
-		return true;
-    }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
@ -29,18 +29,10 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.in.IndicNormalizationFilter;
-import org.apache.lucene.analysis.in.IndicTokenizer;
 import org.apache.lucene.util.Version;

 /**
 * Analyzer for Hindi.
- * <p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating HindiAnalyzer:
- * <ul>
- *   <li> As of 3.6, StandardTokenizer is used for tokenization
- * </ul>
 */
 public final class HindiAnalyzer extends StopwordAnalyzerBase {
  private final CharArraySet stemExclusionSet;
@ -126,12 +118,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    final Tokenizer source;
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      source = new StandardTokenizer(matchVersion, reader);
-    } else {
-      source = new IndicTokenizer(matchVersion, reader);
-    }
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new LowerCaseFilter(matchVersion, source);
    if (!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
@ -1,53 +0,0 @@
-package org.apache.lucene.analysis.in;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizer; // javadocs
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
-
-/**
- * Simple Tokenizer for text in Indian Languages.
- * @deprecated (3.6) Use {@link StandardTokenizer} instead.
- */
-@Deprecated
-public final class IndicTokenizer extends CharTokenizer {
- 
-  public IndicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
-    super(matchVersion, factory, input);
-  }
-
-  public IndicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
-    super(matchVersion, source, input);
-  }
-
-  public IndicTokenizer(Version matchVersion, Reader input) {
-    super(matchVersion, input);
-  }
-
-  @Override
-  protected boolean isTokenChar(int c) {
-    return Character.isLetter(c)
-    || Character.getType(c) == Character.NON_SPACING_MARK
-    || Character.getType(c) == Character.FORMAT
-    || Character.getType(c) == Character.COMBINING_SPACING_MARK;
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@ -36,19 +36,9 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.ItalianStemmer;

 /**
 * {@link Analyzer} for Italian.
- * <p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ItalianAnalyzer:
- * <ul>
- *   <li> As of 3.6, ItalianLightStemFilter is used for less aggressive stemming.
- *   <li> As of 3.2, ElisionFilter with a set of Italian 
- *        contractions is used by default.
- * </ul>
 */
 public final class ItalianAnalyzer extends StopwordAnalyzerBase {
  private final CharArraySet stemExclusionSet;
@ -139,18 +129,12 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
-    if (matchVersion.onOrAfter(Version.LUCENE_32)) {
-      result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
-    }
+    result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      result = new ItalianLightStemFilter(result);
-    } else {
-      result = new SnowballFilter(result, new ItalianStemmer());
-    }
+    result = new ItalianLightStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
@ -1,518 +0,0 @@
-package org.apache.lucene.analysis.miscellaneous;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
-/**
- * Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
- * {@link java.io.Reader}, that can flexibly separate text into terms via a regular expression {@link Pattern}
- * (with behaviour identical to {@link String#split(String)}),
- * and that combines the functionality of
- * {@link org.apache.lucene.analysis.core.LetterTokenizer},
- * {@link org.apache.lucene.analysis.core.LowerCaseTokenizer},
- * {@link org.apache.lucene.analysis.core.WhitespaceTokenizer},
- * {@link org.apache.lucene.analysis.core.StopFilter} into a single efficient
- * multi-purpose class.
- * <p>
- * If you are unsure how exactly a regular expression should look like, consider 
- * prototyping by simply trying various expressions on some test texts via
- * {@link String#split(String)}. Once you are satisfied, give that regex to 
- * PatternAnalyzer. Also see <a target="_blank" 
- * href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
- * <p>
- * This class can be considerably faster than the "normal" Lucene tokenizers. 
- * It can also serve as a building block in a compound Lucene
- * {@link org.apache.lucene.analysis.TokenFilter} chain. For example as in this 
- * stemming example:
- * <pre>
- * PatternAnalyzer pat = ...
- * TokenStream tokenStream = new SnowballFilter(
- *     pat.tokenStream("content", "James is running round in the woods"), 
- *     "English"));
- * </pre>
- * @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
- */
-@Deprecated
-public final class PatternAnalyzer extends Analyzer {
-  
-  /** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
-  public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
-  
-  /** <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) */
-  public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
-  
-  private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
-    CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
-        Arrays.asList(
-      "a", "about", "above", "across", "adj", "after", "afterwards",
-      "again", "against", "albeit", "all", "almost", "alone", "along",
-      "already", "also", "although", "always", "among", "amongst", "an",
-      "and", "another", "any", "anyhow", "anyone", "anything",
-      "anywhere", "are", "around", "as", "at", "be", "became", "because",
-      "become", "becomes", "becoming", "been", "before", "beforehand",
-      "behind", "being", "below", "beside", "besides", "between",
-      "beyond", "both", "but", "by", "can", "cannot", "co", "could",
-      "down", "during", "each", "eg", "either", "else", "elsewhere",
-      "enough", "etc", "even", "ever", "every", "everyone", "everything",
-      "everywhere", "except", "few", "first", "for", "former",
-      "formerly", "from", "further", "had", "has", "have", "he", "hence",
-      "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
-      "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
-      "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
-      "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
-      "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
-      "must", "my", "myself", "namely", "neither", "never",
-      "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
-      "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
-      "once one", "only", "onto", "or", "other", "others", "otherwise",
-      "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
-      "rather", "s", "same", "seem", "seemed", "seeming", "seems",
-      "several", "she", "should", "since", "so", "some", "somehow",
-      "someone", "something", "sometime", "sometimes", "somewhere",
-      "still", "such", "t", "than", "that", "the", "their", "them",
-      "themselves", "then", "thence", "there", "thereafter", "thereby",
-      "therefor", "therein", "thereupon", "these", "they", "this",
-      "those", "though", "through", "throughout", "thru", "thus", "to",
-      "together", "too", "toward", "towards", "under", "until", "up",
-      "upon", "us", "very", "via", "was", "we", "well", "were", "what",
-      "whatever", "whatsoever", "when", "whence", "whenever",
-      "whensoever", "where", "whereafter", "whereas", "whereat",
-      "whereby", "wherefrom", "wherein", "whereinto", "whereof",
-      "whereon", "whereto", "whereunto", "whereupon", "wherever",
-      "wherewith", "whether", "which", "whichever", "whichsoever",
-      "while", "whilst", "whither", "who", "whoever", "whole", "whom",
-      "whomever", "whomsoever", "whose", "whosoever", "why", "will",
-      "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
-      "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
-      "yourselves"
-    ), true));
-    
-  /**
-   * A lower-casing word analyzer with English stop words (can be shared
-   * freely across threads without harm); global per class loader.
-   */
-  public static final PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(
-    Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-    
-  /**
-   * A lower-casing word analyzer with <b>extended </b> English stop words
-   * (can be shared freely across threads without harm); global per class
-   * loader. The stop words are borrowed from
-   * http://thomas.loc.gov/home/stopwords.html, see
-   * http://thomas.loc.gov/home/all.about.inquery.html
-   */
-  public static final PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(
-    Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
-    
-  private final Pattern pattern;
-  private final boolean toLowerCase;
-  private final CharArraySet stopWords;
-
-  private final Version matchVersion;
-  
-  /**
-   * Constructs a new instance with the given parameters.
-   * 
-   * @param matchVersion currently does nothing
-   * @param pattern
-   *            a regular expression delimiting tokens
-   * @param toLowerCase
-   *            if <code>true</code> returns tokens after applying
-   *            String.toLowerCase()
-   * @param stopWords
-   *            if non-null, ignores all tokens that are contained in the
-   *            given stop set (after previously having applied toLowerCase()
-   *            if applicable). For example, created via
-   *            {@link StopFilter#makeStopSet(Version, String[])}and/or
-   *            {@link org.apache.lucene.analysis.util.WordlistLoader}as in
-   *            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
-   *            or <a href="http://www.unine.ch/info/clef/">other stop words
-   *            lists </a>.
-   */
-  public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, CharArraySet stopWords) {
-    if (pattern == null) 
-      throw new IllegalArgumentException("pattern must not be null");
-    
-    if (eqPattern(NON_WORD_PATTERN, pattern)) pattern = NON_WORD_PATTERN;
-    else if (eqPattern(WHITESPACE_PATTERN, pattern)) pattern = WHITESPACE_PATTERN;
-    
-    if (stopWords != null && stopWords.size() == 0) stopWords = null;
-    
-    this.pattern = pattern;
-    this.toLowerCase = toLowerCase;
-    this.stopWords = stopWords;
-    this.matchVersion = matchVersion;
-  }
-  
-  /**
-   * Creates a token stream that tokenizes the given string into token terms
-   * (aka words).
-   * 
-   * @param fieldName
-   *            the name of the field to tokenize (currently ignored).
-   * @param reader
-   *            reader (e.g. charfilter) of the original text. can be null.
-   * @param text
-   *            the string to tokenize
-   * @return a new token stream
-   */
-  public TokenStreamComponents createComponents(String fieldName, Reader reader, String text) {
-    // Ideally the Analyzer superclass should have a method with the same signature, 
-    // with a default impl that simply delegates to the StringReader flavour. 
-    if (text == null) 
-      throw new IllegalArgumentException("text must not be null");
-    
-    if (pattern == NON_WORD_PATTERN) { // fast path
-      return new TokenStreamComponents(new FastStringTokenizer(reader, text, true, toLowerCase, stopWords));
-    } else if (pattern == WHITESPACE_PATTERN) { // fast path
-      return new TokenStreamComponents(new FastStringTokenizer(reader, text, false, toLowerCase, stopWords));
-    }
-
-    Tokenizer tokenizer = new PatternTokenizer(reader, text, pattern, toLowerCase);
-    TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
-    return new TokenStreamComponents(tokenizer, result);
-  }
-  
-  /**
-   * Creates a token stream that tokenizes all the text in the given Reader;
-   * This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
-   * less efficient than <code>tokenStream(String, Reader, String)</code>.
-   * 
-   * @param fieldName
-   *            the name of the field to tokenize (currently ignored).
-   * @param reader
-   *            the reader delivering the text
-   * @return a new token stream
-   */
-  @Override
-  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    try {
-      String text = toString(reader);
-      return createComponents(fieldName, reader, text);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-  
-  /**
-   * Indicates whether some other object is "equal to" this one.
-   * 
-   * @param other
-   *            the reference object with which to compare.
-   * @return true if equal, false otherwise
-   */
-  @Override
-  public boolean equals(Object other) {
-    if (this  == other) return true;
-    if (this  == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) return false;
-    if (other == DEFAULT_ANALYZER && this  == EXTENDED_ANALYZER) return false;
-    
-    if (other instanceof PatternAnalyzer) {
-      PatternAnalyzer p2 = (PatternAnalyzer) other;
-      return 
-        toLowerCase == p2.toLowerCase &&
-        eqPattern(pattern, p2.pattern) &&
-        eq(stopWords, p2.stopWords);
-    }
-    return false;
-  }
-  
-  /**
-   * Returns a hash code value for the object.
-   * 
-   * @return the hash code.
-   */
-  @Override
-  public int hashCode() {
-    if (this == DEFAULT_ANALYZER) return -1218418418; // fast path
-    if (this == EXTENDED_ANALYZER) return 1303507063; // fast path
-    
-    int h = 1;
-    h = 31*h + pattern.pattern().hashCode();
-    h = 31*h + pattern.flags();
-    h = 31*h + (toLowerCase ? 1231 : 1237);
-    h = 31*h + (stopWords != null ? stopWords.hashCode() : 0);
-    return h;
-  }
-  
-  /** equality where o1 and/or o2 can be null */
-  private static boolean eq(Object o1, Object o2) {
-    return (o1 == o2) || (o1 != null ? o1.equals(o2) : false);
-  }
-  
-  /** assumes p1 and p2 are not null */
-  private static boolean eqPattern(Pattern p1, Pattern p2) {
-    return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().equals(p2.pattern()));
-  }
-    
-  /**
-   * Reads until end-of-stream and returns all read chars, finally closes the stream.
-   * 
-   * @param input the input stream
-   * @throws IOException if an I/O error occurs while reading the stream
-   */
-  private static String toString(Reader input) throws IOException {
-    if (input instanceof FastStringReader) { // fast path
-      return ((FastStringReader) input).getString();
-    }
-
-    try {
-      int len = 256;
-      char[] buffer = new char[len];
-      char[] output = new char[len];
-      
-      len = 0;
-      int n;
-      while ((n = input.read(buffer)) >= 0) {
-        if (len + n > output.length) { // grow capacity
-          char[] tmp = new char[Math.max(output.length << 1, len + n)];
-          System.arraycopy(output, 0, tmp, 0, len);
-          System.arraycopy(buffer, 0, tmp, len, n);
-          buffer = output; // use larger buffer for future larger bulk reads
-          output = tmp;
-        } else {
-          System.arraycopy(buffer, 0, output, len, n);
-        }
-        len += n;
-      }
-
-      return new String(output, 0, len);
-    } finally {
-      input.close();
-    }
-  }
-  
-  
-  ///////////////////////////////////////////////////////////////////////////////
-  // Nested classes:
-  ///////////////////////////////////////////////////////////////////////////////
-  /**
-   * The work horse; performance isn't fantastic, but it's not nearly as bad
-   * as one might think - kudos to the Sun regex developers.
-   */
-  private static final class PatternTokenizer extends Tokenizer {
-
-    private final Pattern pattern;
-    private String str;
-    private final boolean toLowerCase;
-    private Matcher matcher;
-    private int pos = 0;
-    private static final Locale locale = Locale.getDefault();
-    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-    
-    public PatternTokenizer(Reader input, String str, Pattern pattern, boolean toLowerCase) {
-      super(input);
-      this.pattern = pattern;
-      this.str = str;
-      this.matcher = pattern.matcher(str);
-      this.toLowerCase = toLowerCase;
-    }
-
-    @Override
-    public final boolean incrementToken() {
-      if (matcher == null) return false;
-      clearAttributes();
-      while (true) { // loop takes care of leading and trailing boundary cases
-        int start = pos;
-        int end;
-        boolean isMatch = matcher.find();
-        if (isMatch) {
-          end = matcher.start();
-          pos = matcher.end();
-        } else { 
-          end = str.length();
-          matcher = null; // we're finished
-        }
-        
-        if (start != end) { // non-empty match (header/trailer)
-          String text = str.substring(start, end);
-          if (toLowerCase) text = text.toLowerCase(locale);
-          termAtt.setEmpty().append(text);
-          offsetAtt.setOffset(correctOffset(start), correctOffset(end));
-          return true;
-        }
-        if (!isMatch) return false;
-      }
-    }
-    
-    @Override
-    public final void end() {
-      // set final offset
-      final int finalOffset = correctOffset(str.length());
-    	this.offsetAtt.setOffset(finalOffset, finalOffset);
-    }
-
-    @Override
-    public void reset(Reader input) throws IOException {
-      super.reset(input);
-      this.str = PatternAnalyzer.toString(input);
-      this.matcher = pattern.matcher(this.str);
-    }
-
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      this.pos = 0;
-    }
-  }
-  
-  
-  ///////////////////////////////////////////////////////////////////////////////
-  // Nested classes:
-  ///////////////////////////////////////////////////////////////////////////////
-  /**
-   * Special-case class for best performance in common cases; this class is
-   * otherwise unnecessary.
-   */
-  private static final class FastStringTokenizer extends Tokenizer {
-    
-    private String str;
-    private int pos;
-    private final boolean isLetter;
-    private final boolean toLowerCase;
-    private final CharArraySet stopWords;
-    private static final Locale locale = Locale.getDefault();
-    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-    
-    public FastStringTokenizer(Reader input, String str, boolean isLetter, boolean toLowerCase, CharArraySet stopWords) {
-      super(input);
-      this.str = str;
-      this.isLetter = isLetter;
-      this.toLowerCase = toLowerCase;
-      this.stopWords = stopWords;
-    }
-
-    @Override
-    public boolean incrementToken() {
-      clearAttributes();
-      // cache loop instance vars (performance)
-      String s = str;
-      int len = s.length();
-      int i = pos;
-      boolean letter = isLetter;
-      
-      int start = 0;
-      String text;
-      do {
-        // find beginning of token
-        text = null;
-        while (i < len && !isTokenChar(s.charAt(i), letter)) {
-          i++;
-        }
-        
-        if (i < len) { // found beginning; now find end of token
-          start = i;
-          while (i < len && isTokenChar(s.charAt(i), letter)) {
-            i++;
-          }
-          
-          text = s.substring(start, i);
-          if (toLowerCase) text = text.toLowerCase(locale);
-//          if (toLowerCase) {            
-////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
-////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
-//            text = s.substring(start, i).toLowerCase(); 
-////            char[] chars = new char[i-start];
-////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
-////            text = new String(chars);
-//          } else {
-//            text = s.substring(start, i);
-//          }
-        }
-      } while (text != null && isStopWord(text));
-      
-      pos = i;
-      if (text == null)
-      {
-        return false;
-      }
-      termAtt.setEmpty().append(text);
-      offsetAtt.setOffset(correctOffset(start), correctOffset(i));
-      return true;
-    }
-    
-    @Override
-    public final void end() {
-      // set final offset
-      final int finalOffset = str.length();
-      this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
-    }    
-    
-    private boolean isTokenChar(char c, boolean isLetter) {
-      return isLetter ? Character.isLetter(c) : !Character.isWhitespace(c);
-    }
-    
-    private boolean isStopWord(String text) {
-      return stopWords != null && stopWords.contains(text);
-    }
-
-    @Override
-    public void reset(Reader input) throws IOException {
-      super.reset(input);
-      this.str = PatternAnalyzer.toString(input);
-    }
-
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      this.pos = 0;
-    }
-  }
-
-  
-  ///////////////////////////////////////////////////////////////////////////////
-  // Nested classes:
-  ///////////////////////////////////////////////////////////////////////////////
-  /**
-   * A StringReader that exposes it's contained string for fast direct access.
-   * Might make sense to generalize this to CharSequence and make it public?
-   */
-  static final class FastStringReader extends StringReader {
-
-    private final String s;
-    
-    FastStringReader(String s) {
-      super(s);
-      this.s = s;
-    }
-    
-    String getString() {
-      return s;
-    }
-  }
-  
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
@ -35,7 +35,7 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
  private final PositionIncrementAttribute posIncAttribute =  addAttribute(PositionIncrementAttribute.class);
  
  // use a fixed version, as we don't care about case sensitivity.
-  private final CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
+  private final CharArraySet previous = new CharArraySet(Version.LUCENE_50, 8, false);

  /**
   * Creates a new RemoveDuplicatesTokenFilter
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -47,20 +47,6 @@ import java.io.Reader;
 * A default set of stopwords is used unless an alternative list is specified, but the
 * exclusion list is empty by default.
 * </p>
- *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating DutchAnalyzer:
- * <ul>
- *   <li> As of 3.6, {@link #DutchAnalyzer(Version, CharArraySet)} and
- *        {@link #DutchAnalyzer(Version, CharArraySet, CharArraySet)} also populate
- *        the default entries for the stem override dictionary
- *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
- *        LowerCaseFilter is used prior to StopFilter, and Snowball 
- *        stopwords are used by default.
- *   <li> As of 2.9, StopFilter preserves position
- *        increments
- * </ul>
 * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
@ -119,26 +105,15 @@ public final class DutchAnalyzer extends Analyzer {
   * 
   */
  public DutchAnalyzer(Version matchVersion) {
-    // historically, only this ctor populated the stem dict!!!!!
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
  }
  
  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
-    // historically, this ctor never the stem dict!!!!!
-    // so we populate it only for >= 3.6
-    this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
-        matchVersion.onOrAfter(Version.LUCENE_36) 
-        ? DefaultSetHolder.DEFAULT_STEM_DICT 
-        : CharArrayMap.<String>emptyMap());
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
  }
  
  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
-    // historically, this ctor never the stem dict!!!!!
-    // so we populate it only for >= 3.6
-    this(matchVersion, stopwords, stemExclusionTable,
-        matchVersion.onOrAfter(Version.LUCENE_36)
-        ? DefaultSetHolder.DEFAULT_STEM_DICT
-        : CharArrayMap.<String>emptyMap());
+    this(matchVersion, stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT);
  }
  
  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
@ -160,25 +135,15 @@ public final class DutchAnalyzer extends Analyzer {
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader aReader) {
-    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
-      TokenStream result = new StandardFilter(matchVersion, source);
-      result = new LowerCaseFilter(matchVersion, result);
-      result = new StopFilter(matchVersion, result, stoptable);
-      if (!excltable.isEmpty())
-        result = new KeywordMarkerFilter(result, excltable);
-      if (!stemdict.isEmpty())
-        result = new StemmerOverrideFilter(matchVersion, result, stemdict);
-      result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
-      return new TokenStreamComponents(source, result);
-    } else {
-      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
-      TokenStream result = new StandardFilter(matchVersion, source);
-      result = new StopFilter(matchVersion, result, stoptable);
-      if (!excltable.isEmpty())
-        result = new KeywordMarkerFilter(result, excltable);
-      result = new DutchStemFilter(result, stemdict);
-      return new TokenStreamComponents(source, result);
-    }
+    final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stoptable);
+    if (!excltable.isEmpty())
+      result = new KeywordMarkerFilter(result, excltable);
+    if (!stemdict.isEmpty())
+      result = new StemmerOverrideFilter(matchVersion, result, stemdict);
+    result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
+    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
@ -1,108 +0,0 @@
-package org.apache.lucene.analysis.nl;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * A {@link TokenFilter} that stems Dutch words. 
- * <p>
- * It supports a table of words that should
- * not be stemmed at all. The stemmer used can be changed at runtime after the
- * filter object is created (as long as it is a {@link DutchStemmer}).
- * </p>
- * <p>
- * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
- * the {@link KeywordAttribute} before this {@link TokenStream}.
- * </p>
- * @see KeywordMarkerFilter
- * @deprecated (3.1) Use {@link SnowballFilter} with 
- * {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
- * same functionality. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class DutchStemFilter extends TokenFilter {
-  /**
-   * The actual token in the input stream.
-   */
-  private DutchStemmer stemmer = new DutchStemmer();
-  
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
-
-  public DutchStemFilter(TokenStream _in) {
-    super(_in);
-  }
-
-  /**
-   * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
-   */
-  public DutchStemFilter(TokenStream _in,  Map<?,?> stemdictionary) {
-    this(_in);
-    stemmer.setStemDictionary(stemdictionary);
-  }
-
-  /**
-   * Returns the next token in the stream, or null at EOS
-   */
-  @Override
-  public boolean incrementToken() throws IOException {
-    if (input.incrementToken()) {
-      final String term = termAtt.toString();
-
-      // Check the exclusion table.
-      if (!keywordAttr.isKeyword()) {
-        final String s = stemmer.stem(term);
-        // If not stemmed, don't waste the time adjusting the token.
-        if ((s != null) && !s.equals(term))
-          termAtt.setEmpty().append(s);
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * Set a alternative/custom {@link DutchStemmer} for this filter.
-   */
-  public void setStemmer(DutchStemmer stemmer) {
-    if (stemmer != null) {
-      this.stemmer = stemmer;
-    }
-  }
-
-  /**
-   * Set dictionary for stemming, this dictionary overrules the algorithm,
-   * so you can correct for a particular unwanted word-stem pair.
-   */
-  public void setStemDictionary(HashMap<?,?> dict) {
-    if (stemmer != null)
-      stemmer.setStemDictionary(dict);
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
@ -1,409 +0,0 @@
-package org.apache.lucene.analysis.nl;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Map;
-
-/**
- * A stemmer for Dutch words. 
- * <p>
- * The algorithm is an implementation of
- * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
- * algorithm in Martin Porter's snowball project.
- * </p>
- * @deprecated (3.1) Use {@link org.tartarus.snowball.ext.DutchStemmer} instead, 
- * which has the same functionality. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public class DutchStemmer {
-  /**
-   * Buffer for the terms while stemming them.
-   */
-  private StringBuilder sb = new StringBuilder();
-  private boolean _removedE;
-  private Map _stemDict;
-
-  private int _R1;
-  private int _R2;
-
-  //TODO convert to internal
-  /*
-   * Stems the given term to an unique <tt>discriminator</tt>.
-   *
-   * @param term The term that should be stemmed.
-   * @return Discriminator for <tt>term</tt>
-   */
-  public String stem(String term) {
-    term = term.toLowerCase();
-    if (!isStemmable(term))
-      return term;
-    if (_stemDict != null && _stemDict.containsKey(term))
-      if (_stemDict.get(term) instanceof String)
-        return (String) _stemDict.get(term);
-      else
-        return null;
-
-    // Reset the StringBuilder.
-    sb.delete(0, sb.length());
-    sb.insert(0, term);
-    // Stemming starts here...
-    substitute(sb);
-    storeYandI(sb);
-    _R1 = getRIndex(sb, 0);
-    _R1 = Math.max(3, _R1);
-    step1(sb);
-    step2(sb);
-    _R2 = getRIndex(sb, _R1);
-    step3a(sb);
-    step3b(sb);
-    step4(sb);
-    reStoreYandI(sb);
-    return sb.toString();
-  }
-
-  private boolean enEnding(StringBuilder sb) {
-    String[] enend = new String[]{"ene", "en"};
-    for (int i = 0; i < enend.length; i++) {
-      String end = enend[i];
-      String s = sb.toString();
-      int index = s.length() - end.length();
-      if (s.endsWith(end) &&
-          index >= _R1 &&
-          isValidEnEnding(sb, index - 1)
-      ) {
-        sb.delete(index, index + end.length());
-        unDouble(sb, index);
-        return true;
-      }
-    }
-    return false;
-  }
-
-
-  private void step1(StringBuilder sb) {
-    if (_R1 >= sb.length())
-      return;
-
-    String s = sb.toString();
-    int lengthR1 = sb.length() - _R1;
-    int index;
-
-    if (s.endsWith("heden")) {
-      sb.replace(_R1, lengthR1 + _R1, sb.substring(_R1, lengthR1 + _R1).replaceAll("heden", "heid"));
-      return;
-    }
-
-    if (enEnding(sb))
-      return;
-
-    if (s.endsWith("se") &&
-        (index = s.length() - 2) >= _R1 &&
-        isValidSEnding(sb, index - 1)
-    ) {
-      sb.delete(index, index + 2);
-      return;
-    }
-    if (s.endsWith("s") &&
-        (index = s.length() - 1) >= _R1 &&
-        isValidSEnding(sb, index - 1)) {
-      sb.delete(index, index + 1);
-    }
-  }
-
-  /**
-   * Delete suffix e if in R1 and
-   * preceded by a non-vowel, and then undouble the ending
-   *
-   * @param sb String being stemmed
-   */
-  private void step2(StringBuilder sb) {
-    _removedE = false;
-    if (_R1 >= sb.length())
-      return;
-    String s = sb.toString();
-    int index = s.length() - 1;
-    if (index >= _R1 &&
-        s.endsWith("e") &&
-        !isVowel(sb.charAt(index - 1))) {
-      sb.delete(index, index + 1);
-      unDouble(sb);
-      _removedE = true;
-    }
-  }
-
-  /**
-   * Delete "heid"
-   *
-   * @param sb String being stemmed
-   */
-  private void step3a(StringBuilder sb) {
-    if (_R2 >= sb.length())
-      return;
-    String s = sb.toString();
-    int index = s.length() - 4;
-    if (s.endsWith("heid") && index >= _R2 && sb.charAt(index - 1) != 'c') {
-      sb.delete(index, index + 4); //remove heid
-      enEnding(sb);
-    }
-  }
-
-  /**
-   * <p>A d-suffix, or derivational suffix, enables a new word,
-   * often with a different grammatical category, or with a different
-   * sense, to be built from another word. Whether a d-suffix can be
-   * attached is discovered not from the rules of grammar, but by
-   * referring to a dictionary. So in English, ness can be added to
-   * certain adjectives to form corresponding nouns (littleness,
-   * kindness, foolishness ...) but not to all adjectives
-   * (not for example, to big, cruel, wise ...) d-suffixes can be
-   * used to change meaning, often in rather exotic ways.</p>
-   * Remove "ing", "end", "ig", "lijk", "baar" and "bar"
-   *
-   * @param sb String being stemmed
-   */
-  private void step3b(StringBuilder sb) {
-    if (_R2 >= sb.length())
-      return;
-    String s = sb.toString();
-    int index = 0;
-
-    if ((s.endsWith("end") || s.endsWith("ing")) &&
-        (index = s.length() - 3) >= _R2) {
-      sb.delete(index, index + 3);
-      if (sb.charAt(index - 2) == 'i' &&
-          sb.charAt(index - 1) == 'g') {
-        if (sb.charAt(index - 3) != 'e' & index - 2 >= _R2) {
-          index -= 2;
-          sb.delete(index, index + 2);
-        }
-      } else {
-        unDouble(sb, index);
-      }
-      return;
-    }
-    if (s.endsWith("ig") &&
-        (index = s.length() - 2) >= _R2
-    ) {
-      if (sb.charAt(index - 1) != 'e')
-        sb.delete(index, index + 2);
-      return;
-    }
-    if (s.endsWith("lijk") &&
-        (index = s.length() - 4) >= _R2
-    ) {
-      sb.delete(index, index + 4);
-      step2(sb);
-      return;
-    }
-    if (s.endsWith("baar") &&
-        (index = s.length() - 4) >= _R2
-    ) {
-      sb.delete(index, index + 4);
-      return;
-    }
-    if (s.endsWith("bar") &&
-        (index = s.length() - 3) >= _R2
-    ) {
-      if (_removedE)
-        sb.delete(index, index + 3);
-      return;
-    }
-  }
-
-  /**
-   * undouble vowel
-   * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
-   *
-   * @param sb String being stemmed
-   */
-  private void step4(StringBuilder sb) {
-    if (sb.length() < 4)
-      return;
-    String end = sb.substring(sb.length() - 4, sb.length());
-    char c = end.charAt(0);
-    char v1 = end.charAt(1);
-    char v2 = end.charAt(2);
-    char d = end.charAt(3);
-    if (v1 == v2 &&
-        d != 'I' &&
-        v1 != 'i' &&
-        isVowel(v1) &&
-        !isVowel(d) &&
-        !isVowel(c)) {
-      sb.delete(sb.length() - 2, sb.length() - 1);
-    }
-  }
-
-  /**
-   * Checks if a term could be stemmed.
-   *
-   * @return true if, and only if, the given term consists in letters.
-   */
-  private boolean isStemmable(String term) {
-    for (int c = 0; c < term.length(); c++) {
-      if (!Character.isLetter(term.charAt(c))) return false;
-    }
-    return true;
-  }
-
-  /**
-   * Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
-   */
-  private void substitute(StringBuilder buffer) {
-    for (int i = 0; i < buffer.length(); i++) {
-      switch (buffer.charAt(i)) {
-        case 'ä':
-        case 'á':
-          {
-            buffer.setCharAt(i, 'a');
-            break;
-          }
-        case 'ë':
-        case 'é':
-          {
-            buffer.setCharAt(i, 'e');
-            break;
-          }
-        case 'ü':
-        case 'ú':
-          {
-            buffer.setCharAt(i, 'u');
-            break;
-          }
-        case 'ï':
-        case 'i':
-          {
-            buffer.setCharAt(i, 'i');
-            break;
-          }
-        case 'ö':
-        case 'ó':
-          {
-            buffer.setCharAt(i, 'o');
-            break;
-          }
-      }
-    }
-  }
-
-  /*private boolean isValidSEnding(StringBuilder sb) {
-    return isValidSEnding(sb, sb.length() - 1);
-  }*/
-
-  private boolean isValidSEnding(StringBuilder sb, int index) {
-    char c = sb.charAt(index);
-    if (isVowel(c) || c == 'j')
-      return false;
-    return true;
-  }
-
-  /*private boolean isValidEnEnding(StringBuilder sb) {
-    return isValidEnEnding(sb, sb.length() - 1);
-  }*/
-
-  private boolean isValidEnEnding(StringBuilder sb, int index) {
-    char c = sb.charAt(index);
-    if (isVowel(c))
-      return false;
-    if (c < 3)
-      return false;
-    // ends with "gem"?
-    if (c == 'm' && sb.charAt(index - 2) == 'g' && sb.charAt(index - 1) == 'e')
-      return false;
-    return true;
-  }
-
-  private void unDouble(StringBuilder sb) {
-    unDouble(sb, sb.length());
-  }
-
-  private void unDouble(StringBuilder sb, int endIndex) {
-    String s = sb.substring(0, endIndex);
-    if (s.endsWith("kk") || s.endsWith("tt") || s.endsWith("dd") || s.endsWith("nn") || s.endsWith("mm") || s.endsWith("ff")) {
-      sb.delete(endIndex - 1, endIndex);
-    }
-  }
-
-  private int getRIndex(StringBuilder sb, int start) {
-    if (start == 0)
-      start = 1;
-    int i = start;
-    for (; i < sb.length(); i++) {
-      //first non-vowel preceded by a vowel
-      if (!isVowel(sb.charAt(i)) && isVowel(sb.charAt(i - 1))) {
-        return i + 1;
-      }
-    }
-    return i + 1;
-  }
-
-  private void storeYandI(StringBuilder sb) {
-    if (sb.charAt(0) == 'y')
-      sb.setCharAt(0, 'Y');
-
-    int last = sb.length() - 1;
-
-    for (int i = 1; i < last; i++) {
-      switch (sb.charAt(i)) {
-        case 'i':
-          {
-            if (isVowel(sb.charAt(i - 1)) &&
-                isVowel(sb.charAt(i + 1))
-            )
-              sb.setCharAt(i, 'I');
-            break;
-          }
-        case 'y':
-          {
-            if (isVowel(sb.charAt(i - 1)))
-              sb.setCharAt(i, 'Y');
-            break;
-          }
-      }
-    }
-    if (last > 0 && sb.charAt(last) == 'y' && isVowel(sb.charAt(last - 1)))
-      sb.setCharAt(last, 'Y');
-  }
-
-  private void reStoreYandI(StringBuilder sb) {
-    String tmp = sb.toString();
-    sb.delete(0, sb.length());
-    sb.insert(0, tmp.replaceAll("I", "i").replaceAll("Y", "y"));
-  }
-
-  private boolean isVowel(char c) {
-    switch (c) {
-      case 'e':
-      case 'a':
-      case 'o':
-      case 'i':
-      case 'u':
-      case 'y':
-      case 'è':
-        {
-          return true;
-        }
-    }
-    return false;
-  }
-
-  void setStemDictionary(Map dict) {
-    _stemDict = dict;
-  }
-
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
@ -49,8 +49,6 @@ import org.apache.lucene.analysis.charfilter.BaseCharFilter;
 * @since Solr 1.5
 */
 public class PatternReplaceCharFilter extends BaseCharFilter {
-  @Deprecated
-  public static final int DEFAULT_MAX_BLOCK_CHARS = 10000;

  private final Pattern pattern;
  private final String replacement;
@ -62,12 +60,6 @@ public class PatternReplaceCharFilter extends BaseCharFilter {
    this.replacement = replacement;
  }

-  @Deprecated
-  public PatternReplaceCharFilter(Pattern pattern, String replacement, 
-      int maxBlockChars, String blockDelimiter, CharStream in) {
-    this(pattern, replacement, in);
-  }
-
  @Override
  public int read(char[] cbuf, int off, int len) throws IOException {
    // Buffer all input on the first call.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@ -34,17 +34,9 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.PortugueseStemmer;

 /**
 * {@link Analyzer} for Portuguese.
- * <p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating PortugueseAnalyzer:
- * <ul>
- *   <li> As of 3.6, PortugueseLightStemFilter is used for less aggressive stemming.
- * </ul>
 */
 public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
  private final CharArraySet stemExclusionSet;
@ -132,11 +124,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      result = new PortugueseLightStemFilter(result);
-    } else {
-      result = new SnowballFilter(result, new PortugueseStemmer());
-    }
+    result = new PortugueseLightStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@ -134,7 +134,7 @@ public abstract class RSLPStemmerBase {
        if (!exceptions[i].endsWith(suffix))
          System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
      }
-      this.exceptions = new CharArraySet(Version.LUCENE_31,
+      this.exceptions = new CharArraySet(Version.LUCENE_50,
           Arrays.asList(exceptions), false);
    }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
@ -31,14 +31,6 @@ import java.io.IOException;
 * that character. For example, with a marker of &#x5C;u0001, "country" =>
 * "&#x5C;u0001yrtnuoc". This is useful when implementing efficient leading
 * wildcards search.
- * </p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ReverseStringFilter, or when using any of
- * its static methods:
- * <ul>
- *   <li> As of 3.1, supplementary characters are handled correctly
- * </ul>
 */
 public final class ReverseStringFilter extends TokenFilter {

@ -74,7 +66,7 @@ public final class ReverseStringFilter extends TokenFilter {
   * The reversed tokens will not be marked. 
   * </p>
   * 
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion Lucene compatibility version
   * @param in {@link TokenStream} to filter
   */
  public ReverseStringFilter(Version matchVersion, TokenStream in) {
@ -89,7 +81,7 @@ public final class ReverseStringFilter extends TokenFilter {
   * character.
   * </p>
   * 
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param in {@link TokenStream} to filter
   * @param marker A character used to mark reversed tokens
   */
@ -119,7 +111,7 @@ public final class ReverseStringFilter extends TokenFilter {
  /**
   * Reverses the given input string
   * 
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param input the string to reverse
   * @return the given input string in reversed order
   */
@ -131,7 +123,7 @@ public final class ReverseStringFilter extends TokenFilter {
  
  /**
   * Reverses the given input buffer in-place
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param buffer the input char array to reverse
   */
  public static void reverse(Version matchVersion, final char[] buffer) {
@ -141,7 +133,7 @@ public final class ReverseStringFilter extends TokenFilter {
  /**
   * Partially reverses the given input buffer in-place from offset 0
   * up to the given length.
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param buffer the input char array to reverse
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
@ -151,24 +143,10 @@ public final class ReverseStringFilter extends TokenFilter {
    reverse( matchVersion, buffer, 0, len );
  }
  
-  /**
-   * @deprecated (3.1) Remove this when support for 3.0 indexes is no longer needed.
-   */
-  @Deprecated
-  private static void reverseUnicode3( char[] buffer, int start, int len ){
-    if( len <= 1 ) return;
-    int num = len>>1;
-    for( int i = start; i < ( start + num ); i++ ){
-      char c = buffer[i];
-      buffer[i] = buffer[start * 2 + len - i - 1];
-      buffer[start * 2 + len - i - 1] = c;
-    }
-  }
-  
  /**
   * Partially reverses the given input buffer in-place from the given offset
   * up to the given length.
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param buffer the input char array to reverse
   * @param start the offset from where to reverse the buffer
   * @param len the length in the buffer up to where the
@ -176,10 +154,6 @@ public final class ReverseStringFilter extends TokenFilter {
   */
  public static void reverse(Version matchVersion, final char[] buffer,
      final int start, final int len) {
-    if (!matchVersion.onOrAfter(Version.LUCENE_31)) {
-      reverseUnicode3(buffer, start, len);
-      return;
-    }
    /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
    if (len < 2)
      return;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ru;

 import java.io.IOException;
 import java.io.Reader;
-import java.util.Arrays;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
@ -42,44 +41,13 @@ import org.apache.lucene.util.Version;
 * Supports an external list of stopwords (words that
 * will not be indexed at all).
 * A default set of stopwords is used unless an alternative list is specified.
- * </p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating RussianAnalyzer:
- * <ul>
- *   <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
- *        SnowballFilter, and Snowball stopwords are used by default.
- * </ul>
 */
-public final class RussianAnalyzer extends StopwordAnalyzerBase
-{
-    /**
-     * List of typical Russian stopwords. (for backwards compatibility)
-     * @deprecated (3.1) Remove this for LUCENE 5.0
-     */
-    @Deprecated
-    private static final String[] RUSSIAN_STOP_WORDS_30 = {
-      "а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
-      "вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где", 
-      "да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", 
-      "еще", "же", "за", "здесь", "и", "из", "или", "им", "их", "к", "как",
-      "ко", "когда", "кто", "ли", "либо", "мне", "может", "мы", "на", "надо", 
-      "наш", "не", "него", "нее", "нет", "ни", "них", "но", "ну", "о", "об", 
-      "однако", "он", "она", "они", "оно", "от", "очень", "по", "под", "при", 
-      "с", "со", "так", "также", "такой", "там", "те", "тем", "то", "того", 
-      "тоже", "той", "только", "том", "ты", "у", "уже", "хотя", "чего", "чей", 
-      "чем", "что", "чтобы", "чье", "чья", "эта", "эти", "это", "я"
-    };
+public final class RussianAnalyzer extends StopwordAnalyzerBase {
    
    /** File containing default Russian stopwords. */
    public final static String DEFAULT_STOPWORD_FILE = "russian_stop.txt";
    
    private static class DefaultSetHolder {
-      /** @deprecated (3.1) remove this for Lucene 5.0 */
-      @Deprecated
-      static final CharArraySet DEFAULT_STOP_SET_30 = CharArraySet
-          .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
-              Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
      static final CharArraySet DEFAULT_STOP_SET;
      
      static {
@ -106,9 +74,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
    }

    public RussianAnalyzer(Version matchVersion) {
-      this(matchVersion,
-        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
-            : DefaultSetHolder.DEFAULT_STOP_SET_30);
+      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
    }
  
    /**
@ -151,23 +117,13 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
-      if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-        TokenStream result = new StandardFilter(matchVersion, source);
-        result = new LowerCaseFilter(matchVersion, result);
-        result = new StopFilter(matchVersion, result, stopwords);
-        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
-            result, stemExclusionSet);
-        result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
-        return new TokenStreamComponents(source, result);
-      } else {
-        final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
-        TokenStream result = new LowerCaseFilter(matchVersion, source);
-        result = new StopFilter(matchVersion, result, stopwords);
-        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
-          result, stemExclusionSet);
-        result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
-        return new TokenStreamComponents(source, result);
-      }
+      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+      TokenStream result = new StandardFilter(matchVersion, source);
+      result = new LowerCaseFilter(matchVersion, result);
+      result = new StopFilter(matchVersion, result, stopwords);
+      if (!stemExclusionSet.isEmpty()) 
+        result = new KeywordMarkerFilter(result, stemExclusionSet);
+      result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+      return new TokenStreamComponents(source, result);
    }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
@ -1,97 +0,0 @@
-package org.apache.lucene.analysis.ru;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-import org.apache.lucene.analysis.Tokenizer; // for javadocs
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
-
-/**
- * A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer}
- * by also allowing the basic Latin digits 0-9.
- * <p>
- * <a name="version"/>
- * You must specify the required {@link Version} compatibility when creating
- * {@link RussianLetterTokenizer}:
- * <ul>
- * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
- * {@link CharTokenizer#normalize(int)} for details.</li>
- * </ul>
- * @deprecated (3.1) Use {@link StandardTokenizer} instead, which has the same functionality.
- * This filter will be removed in Lucene 5.0 
- */
-@Deprecated
-public class RussianLetterTokenizer extends CharTokenizer
-{    
-    private static final int DIGIT_0 = '0';
-    private static final int DIGIT_9 = '9';
-    
-    /**
-     * Construct a new RussianLetterTokenizer. * @param matchVersion Lucene version
-     * to match See {@link <a href="#version">above</a>}
-     * 
-     * @param in
-     *          the input to split up into tokens
-     */
-    public RussianLetterTokenizer(Version matchVersion, Reader in) {
-      super(matchVersion, in);
-    }
-
-    /**
-     * Construct a new RussianLetterTokenizer using a given {@link AttributeSource}.
-     * 
-     * @param matchVersion
-     *          Lucene version to match See {@link <a href="#version">above</a>}
-     * @param source
-     *          the attribute source to use for this {@link Tokenizer}
-     * @param in
-     *          the input to split up into tokens
-     */
-    public RussianLetterTokenizer(Version matchVersion, AttributeSource source, Reader in) {
-      super(matchVersion, source, in);
-    }
-
-    /**
-     * Construct a new RussianLetterTokenizer using a given
-     * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * @param
-     * matchVersion Lucene version to match See
-     * {@link <a href="#version">above</a>}
-     * 
-     * @param factory
-     *          the attribute factory to use for this {@link Tokenizer}
-     * @param in
-     *          the input to split up into tokens
-     */
-    public RussianLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
-      super(matchVersion, factory, in);
-    }
-    
-     /**
-     * Collects only characters which satisfy
-     * {@link Character#isLetter(int)}.
-     */
-    @Override
-    protected boolean isTokenChar(int c) {
-        return Character.isLetter(c) || (c >= DIGIT_0 && c <= DIGIT_9);
-    }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@ -1,88 +0,0 @@
-package org.apache.lucene.analysis.snowball;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
-import org.apache.lucene.analysis.standard.*;
-import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
-import java.io.Reader;
-
-/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
- *
- * Available stemmers are listed in org.tartarus.snowball.ext.  The name of a
- * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
- * {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
- *
- * <p><b>NOTE</b>: This class uses the same {@link Version}
- * dependent settings as {@link StandardAnalyzer}, with the following addition:
- * <ul>
- *   <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
- * </ul>
- * </p>
- * @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead. 
- * This analyzer will be removed in Lucene 5.0
- */
-@Deprecated
-public final class SnowballAnalyzer extends Analyzer {
-  private String name;
-  private CharArraySet stopSet;
-  private final Version matchVersion;
-
-  /** Builds the named analyzer with no stop words. */
-  public SnowballAnalyzer(Version matchVersion, String name) {
-    this.name = name;
-    this.matchVersion = matchVersion;
-  }
-
-  /** Builds the named analyzer with the given stop words. */
-  public SnowballAnalyzer(Version matchVersion, String name, CharArraySet stopWords) {
-    this(matchVersion, name);
-    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
-        stopWords));
-  }
-
-  /** Constructs a {@link StandardTokenizer} filtered by a {@link
-      StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
-      and a {@link SnowballFilter} */
-  @Override
-  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(matchVersion, tokenizer);
-    // remove the possessive 's for english stemmers
-    if (matchVersion.onOrAfter(Version.LUCENE_31) && 
-        (name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
-      result = new EnglishPossessiveFilter(result);
-    // Use a special lowercase filter for turkish, the stemmer expects it.
-    if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
-      result = new TurkishLowerCaseFilter(result);
-    else
-      result = new LowerCaseFilter(matchVersion, result);
-    if (stopSet != null)
-      result = new StopFilter(matchVersion,
-                              result, stopSet);
-    result = new SnowballFilter(result, name);
-    return new TokenStreamComponents(tokenizer, result);
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@ -21,61 +21,19 @@ import java.io.IOException;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.Version;

 /**
 * Normalizes tokens extracted with {@link StandardTokenizer}.
 */
 public class StandardFilter extends TokenFilter {
-  private final Version matchVersion;
  
  public StandardFilter(Version matchVersion, TokenStream in) {
    super(in);
-    this.matchVersion = matchVersion;
  }
  
-  private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
-  private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
-
-  // this filters uses attribute type
-  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  
  @Override
  public final boolean incrementToken() throws IOException {
-    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      return input.incrementToken(); // TODO: add some niceties for the new grammar
-    else
-      return incrementTokenClassic();
-  }
-  
-  public final boolean incrementTokenClassic() throws IOException {
-    if (!input.incrementToken()) {
-      return false;
-    }
-
-    final char[] buffer = termAtt.buffer();
-    final int bufferLength = termAtt.length();
-    final String type = typeAtt.type();
-
-    if (type == APOSTROPHE_TYPE &&      // remove 's
-        bufferLength >= 2 &&
-        buffer[bufferLength-2] == '\'' &&
-        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
-      // Strip last 2 characters off
-      termAtt.setLength(bufferLength - 2);
-    } else if (type == ACRONYM_TYPE) {      // remove dots
-      int upto = 0;
-      for(int i=0;i<bufferLength;i++) {
-        char c = buffer[i];
-        if (c != '.')
-          buffer[upto++] = c;
-      }
-      termAtt.setLength(upto);
-    }
-
-    return true;
+    return input.incrementToken(); // TODO: add some niceties for the new grammar
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -31,31 +30,20 @@ import org.apache.lucene.util.Version;

 /** A grammar-based tokenizer constructed with JFlex.
 * <p>
- * As of Lucene version 3.1, this class implements the Word Break rules from the
+ * This class implements the Word Break rules from the
 * Unicode Text Segmentation algorithm, as specified in 
 * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
 * <p/>
 * <p>Many applications have specific tokenizer needs.  If this tokenizer does
 * not suit your application, please consider copying this source code
 * directory to your project and maintaining your own grammar-based tokenizer.
- *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating StandardTokenizer:
- * <ul>
- *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- *   from their combining characters. If you use a previous version number,
- *   you get the exact broken behavior for backwards compatibility.
- *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
- *   If you use a previous version number, you get the exact behavior of
- *   {@link ClassicTokenizer} for backwards compatibility.
- * </ul>
 */

 public final class StandardTokenizer extends Tokenizer {
  /** A private instance of the JFlex-constructed scanner */
  private StandardTokenizerInterface scanner;

+  // TODO: how can we remove these old types?!
  public static final int ALPHANUM          = 0;
  /** @deprecated (3.1) */
  @Deprecated
@ -146,13 +134,7 @@ public final class StandardTokenizer extends Tokenizer {
  }

  private final void init(Version matchVersion) {
-    if (matchVersion.onOrAfter(Version.LUCENE_34)) {
-      this.scanner = new StandardTokenizerImpl(input);
-    } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      this.scanner = new StandardTokenizerImpl31(input);
-    } else {
-      this.scanner = new ClassicTokenizerImpl(input);
-    }
+    this.scanner = new StandardTokenizerImpl(input);
  }

  // this tokenizer generates three attributes:
@ -184,15 +166,7 @@ public final class StandardTokenizer extends Tokenizer {
        scanner.getText(termAtt);
        final int start = scanner.yychar();
        offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
-        // This 'if' should be removed in the next release. For now, it converts
-        // invalid acronyms to HOST. When removed, only the 'else' part should
-        // remain.
-        if (tokenType == StandardTokenizer.ACRONYM_DEP) {
-          typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]);
-          termAtt.setLength(termAtt.length() - 1); // remove extra '.'
-        } else {
-          typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
-        }
+        typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
        return true;
      } else
        // When we skip a too-long term, we still increment the
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@ -23,8 +23,6 @@ import java.io.InputStreamReader;
 import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
-import org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -50,14 +48,6 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
 *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
 *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
 * </ul>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating UAX29URLEmailTokenizer:
- * <ul>
- *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- *   from their combining characters. If you use a previous version number,
- *   you get the exact broken behavior for backwards compatibility.
- * </ul>
 */

 public final class UAX29URLEmailTokenizer extends Tokenizer {
@ -128,13 +118,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
  }

  private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
-    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      return new UAX29URLEmailTokenizerImpl(input);
-    } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
-      return new UAX29URLEmailTokenizerImpl34(input);
-    } else {
-      return new UAX29URLEmailTokenizerImpl31(input);
-    }
+    return new UAX29URLEmailTokenizerImpl(input);
  }

  // this tokenizer generates three attributes:
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
@ -1,330 +0,0 @@
-/*
- * Copyright 2001-2005 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
-// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
-// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
-
-ASCIITLD = "." (
-	  [aA][cC]
-	| [aA][dD]
-	| [aA][eE]
-	| [aA][eE][rR][oO]
-	| [aA][fF]
-	| [aA][gG]
-	| [aA][iI]
-	| [aA][lL]
-	| [aA][mM]
-	| [aA][nN]
-	| [aA][oO]
-	| [aA][qQ]
-	| [aA][rR]
-	| [aA][rR][pP][aA]
-	| [aA][sS]
-	| [aA][sS][iI][aA]
-	| [aA][tT]
-	| [aA][uU]
-	| [aA][wW]
-	| [aA][xX]
-	| [aA][zZ]
-	| [bB][aA]
-	| [bB][bB]
-	| [bB][dD]
-	| [bB][eE]
-	| [bB][fF]
-	| [bB][gG]
-	| [bB][hH]
-	| [bB][iI]
-	| [bB][iI][zZ]
-	| [bB][jJ]
-	| [bB][mM]
-	| [bB][nN]
-	| [bB][oO]
-	| [bB][rR]
-	| [bB][sS]
-	| [bB][tT]
-	| [bB][vV]
-	| [bB][wW]
-	| [bB][yY]
-	| [bB][zZ]
-	| [cC][aA]
-	| [cC][aA][tT]
-	| [cC][cC]
-	| [cC][dD]
-	| [cC][fF]
-	| [cC][gG]
-	| [cC][hH]
-	| [cC][iI]
-	| [cC][kK]
-	| [cC][lL]
-	| [cC][mM]
-	| [cC][nN]
-	| [cC][oO]
-	| [cC][oO][mM]
-	| [cC][oO][oO][pP]
-	| [cC][rR]
-	| [cC][uU]
-	| [cC][vV]
-	| [cC][xX]
-	| [cC][yY]
-	| [cC][zZ]
-	| [dD][eE]
-	| [dD][jJ]
-	| [dD][kK]
-	| [dD][mM]
-	| [dD][oO]
-	| [dD][zZ]
-	| [eE][cC]
-	| [eE][dD][uU]
-	| [eE][eE]
-	| [eE][gG]
-	| [eE][rR]
-	| [eE][sS]
-	| [eE][tT]
-	| [eE][uU]
-	| [fF][iI]
-	| [fF][jJ]
-	| [fF][kK]
-	| [fF][mM]
-	| [fF][oO]
-	| [fF][rR]
-	| [gG][aA]
-	| [gG][bB]
-	| [gG][dD]
-	| [gG][eE]
-	| [gG][fF]
-	| [gG][gG]
-	| [gG][hH]
-	| [gG][iI]
-	| [gG][lL]
-	| [gG][mM]
-	| [gG][nN]
-	| [gG][oO][vV]
-	| [gG][pP]
-	| [gG][qQ]
-	| [gG][rR]
-	| [gG][sS]
-	| [gG][tT]
-	| [gG][uU]
-	| [gG][wW]
-	| [gG][yY]
-	| [hH][kK]
-	| [hH][mM]
-	| [hH][nN]
-	| [hH][rR]
-	| [hH][tT]
-	| [hH][uU]
-	| [iI][dD]
-	| [iI][eE]
-	| [iI][lL]
-	| [iI][mM]
-	| [iI][nN]
-	| [iI][nN][fF][oO]
-	| [iI][nN][tT]
-	| [iI][oO]
-	| [iI][qQ]
-	| [iI][rR]
-	| [iI][sS]
-	| [iI][tT]
-	| [jJ][eE]
-	| [jJ][mM]
-	| [jJ][oO]
-	| [jJ][oO][bB][sS]
-	| [jJ][pP]
-	| [kK][eE]
-	| [kK][gG]
-	| [kK][hH]
-	| [kK][iI]
-	| [kK][mM]
-	| [kK][nN]
-	| [kK][pP]
-	| [kK][rR]
-	| [kK][wW]
-	| [kK][yY]
-	| [kK][zZ]
-	| [lL][aA]
-	| [lL][bB]
-	| [lL][cC]
-	| [lL][iI]
-	| [lL][kK]
-	| [lL][rR]
-	| [lL][sS]
-	| [lL][tT]
-	| [lL][uU]
-	| [lL][vV]
-	| [lL][yY]
-	| [mM][aA]
-	| [mM][cC]
-	| [mM][dD]
-	| [mM][eE]
-	| [mM][gG]
-	| [mM][hH]
-	| [mM][iI][lL]
-	| [mM][kK]
-	| [mM][lL]
-	| [mM][mM]
-	| [mM][nN]
-	| [mM][oO]
-	| [mM][oO][bB][iI]
-	| [mM][pP]
-	| [mM][qQ]
-	| [mM][rR]
-	| [mM][sS]
-	| [mM][tT]
-	| [mM][uU]
-	| [mM][uU][sS][eE][uU][mM]
-	| [mM][vV]
-	| [mM][wW]
-	| [mM][xX]
-	| [mM][yY]
-	| [mM][zZ]
-	| [nN][aA]
-	| [nN][aA][mM][eE]
-	| [nN][cC]
-	| [nN][eE]
-	| [nN][eE][tT]
-	| [nN][fF]
-	| [nN][gG]
-	| [nN][iI]
-	| [nN][lL]
-	| [nN][oO]
-	| [nN][pP]
-	| [nN][rR]
-	| [nN][uU]
-	| [nN][zZ]
-	| [oO][mM]
-	| [oO][rR][gG]
-	| [pP][aA]
-	| [pP][eE]
-	| [pP][fF]
-	| [pP][gG]
-	| [pP][hH]
-	| [pP][kK]
-	| [pP][lL]
-	| [pP][mM]
-	| [pP][nN]
-	| [pP][rR]
-	| [pP][rR][oO]
-	| [pP][sS]
-	| [pP][tT]
-	| [pP][wW]
-	| [pP][yY]
-	| [qQ][aA]
-	| [rR][eE]
-	| [rR][oO]
-	| [rR][sS]
-	| [rR][uU]
-	| [rR][wW]
-	| [sS][aA]
-	| [sS][bB]
-	| [sS][cC]
-	| [sS][dD]
-	| [sS][eE]
-	| [sS][gG]
-	| [sS][hH]
-	| [sS][iI]
-	| [sS][jJ]
-	| [sS][kK]
-	| [sS][lL]
-	| [sS][mM]
-	| [sS][nN]
-	| [sS][oO]
-	| [sS][rR]
-	| [sS][tT]
-	| [sS][uU]
-	| [sS][vV]
-	| [sS][yY]
-	| [sS][zZ]
-	| [tT][cC]
-	| [tT][dD]
-	| [tT][eE][lL]
-	| [tT][fF]
-	| [tT][gG]
-	| [tT][hH]
-	| [tT][jJ]
-	| [tT][kK]
-	| [tT][lL]
-	| [tT][mM]
-	| [tT][nN]
-	| [tT][oO]
-	| [tT][pP]
-	| [tT][rR]
-	| [tT][rR][aA][vV][eE][lL]
-	| [tT][tT]
-	| [tT][vV]
-	| [tT][wW]
-	| [tT][zZ]
-	| [uU][aA]
-	| [uU][gG]
-	| [uU][kK]
-	| [uU][sS]
-	| [uU][yY]
-	| [uU][zZ]
-	| [vV][aA]
-	| [vV][cC]
-	| [vV][eE]
-	| [vV][gG]
-	| [vV][iI]
-	| [vV][nN]
-	| [vV][uU]
-	| [wW][fF]
-	| [wW][sS]
-	| [xX][nN]--0[zZ][wW][mM]56[dD]
-	| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
-	| [xX][nN]--3[eE]0[bB]707[eE]
-	| [xX][nN]--45[bB][rR][jJ]9[cC]
-	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
-	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
-	| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
-	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
-	| [xX][nN]--[fF][iI][qQ][sS]8[sS]
-	| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
-	| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
-	| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
-	| [xX][nN]--[gG]6[wW]251[dD]
-	| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
-	| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
-	| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
-	| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
-	| [xX][nN]--[jJ]6[wW]193[gG]
-	| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
-	| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
-	| [xX][nN]--[kK][pP][rR][wW]13[dD]
-	| [xX][nN]--[kK][pP][rR][yY]57[dD]
-	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
-	| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
-	| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
-	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
-	| [xX][nN]--[oO]3[cC][wW]4[hH]
-	| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
-	| [xX][nN]--[pP]1[aA][iI]
-	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
-	| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
-	| [xX][nN]--[wW][gG][bB][hH]1[cC]
-	| [xX][nN]--[wW][gG][bB][lL]6[aA]
-	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
-	| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
-	| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
-	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
-	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
-	| [yY][eE]
-	| [yY][tT]
-	| [zZ][aA]
-	| [zZ][mM]
-	| [zZ][wW]
-	) "."?   // Accept trailing root (empty) domain
-
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
@ -1,125 +0,0 @@
-/*
- * Copyright 2010 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Generated using ICU4J 4.6.0.0 on Wednesday, February 9, 2011 4:45:11 PM UTC
-// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
-
-
-ALetterSupp = (
-	  ([\ud80d][\uDC00-\uDC2E])
-	| ([\ud80c][\uDC00-\uDFFF])
-	| ([\ud809][\uDC00-\uDC62])
-	| ([\ud808][\uDC00-\uDF6E])
-	| ([\ud81a][\uDC00-\uDE38])
-	| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
-	| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
-	| ([\ud801][\uDC00-\uDC9D])
-	| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
-	| ([\ud803][\uDC00-\uDC48])
-	| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
-)
-FormatSupp = (
-	  ([\ud804][\uDCBD])
-	| ([\ud834][\uDD73-\uDD7A])
-	| ([\udb40][\uDC01\uDC20-\uDC7F])
-)
-ExtendSupp = (
-	  ([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
-	| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
-	| ([\ud800][\uDDFD])
-	| ([\udb40][\uDD00-\uDDEF])
-	| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
-)
-NumericSupp = (
-	  ([\ud804][\uDC66-\uDC6F])
-	| ([\ud835][\uDFCE-\uDFFF])
-	| ([\ud801][\uDCA0-\uDCA9])
-)
-KatakanaSupp = (
-	  ([\ud82c][\uDC00])
-)
-MidLetterSupp = (
-	  []
-)
-MidNumSupp = (
-	  []
-)
-MidNumLetSupp = (
-	  []
-)
-ExtendNumLetSupp = (
-	  []
-)
-ExtendNumLetSupp = (
-	  []
-)
-ComplexContextSupp = (
-	  []
-)
-HanSupp = (
-	  ([\ud87e][\uDC00-\uDE1D])
-	| ([\ud86b][\uDC00-\uDFFF])
-	| ([\ud86a][\uDC00-\uDFFF])
-	| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
-	| ([\ud868][\uDC00-\uDFFF])
-	| ([\ud86e][\uDC00-\uDC1D])
-	| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
-	| ([\ud86c][\uDC00-\uDFFF])
-	| ([\ud863][\uDC00-\uDFFF])
-	| ([\ud862][\uDC00-\uDFFF])
-	| ([\ud861][\uDC00-\uDFFF])
-	| ([\ud860][\uDC00-\uDFFF])
-	| ([\ud867][\uDC00-\uDFFF])
-	| ([\ud866][\uDC00-\uDFFF])
-	| ([\ud865][\uDC00-\uDFFF])
-	| ([\ud864][\uDC00-\uDFFF])
-	| ([\ud858][\uDC00-\uDFFF])
-	| ([\ud859][\uDC00-\uDFFF])
-	| ([\ud85a][\uDC00-\uDFFF])
-	| ([\ud85b][\uDC00-\uDFFF])
-	| ([\ud85c][\uDC00-\uDFFF])
-	| ([\ud85d][\uDC00-\uDFFF])
-	| ([\ud85e][\uDC00-\uDFFF])
-	| ([\ud85f][\uDC00-\uDFFF])
-	| ([\ud850][\uDC00-\uDFFF])
-	| ([\ud851][\uDC00-\uDFFF])
-	| ([\ud852][\uDC00-\uDFFF])
-	| ([\ud853][\uDC00-\uDFFF])
-	| ([\ud854][\uDC00-\uDFFF])
-	| ([\ud855][\uDC00-\uDFFF])
-	| ([\ud856][\uDC00-\uDFFF])
-	| ([\ud857][\uDC00-\uDFFF])
-	| ([\ud849][\uDC00-\uDFFF])
-	| ([\ud848][\uDC00-\uDFFF])
-	| ([\ud84b][\uDC00-\uDFFF])
-	| ([\ud84a][\uDC00-\uDFFF])
-	| ([\ud84d][\uDC00-\uDFFF])
-	| ([\ud84c][\uDC00-\uDFFF])
-	| ([\ud84f][\uDC00-\uDFFF])
-	| ([\ud84e][\uDC00-\uDFFF])
-	| ([\ud841][\uDC00-\uDFFF])
-	| ([\ud840][\uDC00-\uDFFF])
-	| ([\ud843][\uDC00-\uDFFF])
-	| ([\ud842][\uDC00-\uDFFF])
-	| ([\ud845][\uDC00-\uDFFF])
-	| ([\ud844][\uDC00-\uDFFF])
-	| ([\ud847][\uDC00-\uDFFF])
-	| ([\ud846][\uDC00-\uDFFF])
-)
-HiraganaSupp = (
-	  ([\ud83c][\uDE00])
-	| ([\ud82c][\uDC01])
-)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
@ -1,184 +0,0 @@
-package org.apache.lucene.analysis.standard.std31;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements StandardTokenizer, except with a bug 
- * (https://issues.apache.org/jira/browse/LUCENE-3358) where Han and Hiragana
- * characters would be split from combining characters:
- * @deprecated This class is only for exact backwards compatibility
- */
-@Deprecated
-%%
-
-%unicode 6.0
-%integer
-%final
-%public
-%class StandardTokenizerImpl31
-%implements StandardTokenizerInterface
-%function getNextToken
-%char
-
-%include src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format =  ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend =  ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
-
-// Script=Hangul & Aletter
-HangulEx       = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-ALetterEx      = {ALetter}                     ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx      = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx     = {Katakana}                    ({Format} | {Extend})* 
-MidLetterEx    = ({MidLetter} | {MidNumLet})   ({Format} | {Extend})* 
-MidNumericEx   = ({MidNum} | {MidNumLet})      ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet}                ({Format} | {Extend})*
-
-
-%{
-  /** Alphanumeric sequences */
-  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-  
-  /** Numbers */
-  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-  
-  /**
-   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
-   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
-   * together as as a single token rather than broken up, because the logic
-   * required to break them at word boundaries is too complex for UAX#29.
-   * <p>
-   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
-   */
-  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
-  
-  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
-  
-  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
-  
-  public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
-  
-  public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
-
-  public final int yychar()
-  {
-    return yychar;
-  }
-
-  /**
-   * Fills CharTermAttribute with the current token text.
-   */
-  public final void getText(CharTermAttribute t) {
-    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-  }
-%}
-
-%%
-
-// UAX#29 WB1. 	sot 	÷ 	
-//        WB2. 		÷ 	eot
-//
-<<EOF>> { return StandardTokenizerInterface.YYEOF; }
-
-// UAX#29 WB8.   Numeric × Numeric
-//        WB11.  Numeric (MidNum | MidNumLet) × Numeric
-//        WB12.  Numeric × (MidNum | MidNumLet) Numeric
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
-                              | {MidNumericEx} {NumericEx} 
-                              | {NumericEx})*
-{ExtendNumLetEx}* 
-  { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
-  { return HANGUL_TYPE; }
-  
-{KatakanaEx}+
-  { return KATAKANA_TYPE; }
-
-// UAX#29 WB5.   ALetter × ALetter
-//        WB6.   ALetter × (MidLetter | MidNumLet) ALetter
-//        WB7.   ALetter (MidLetter | MidNumLet) × ALetter
-//        WB9.   ALetter × Numeric
-//        WB10.  Numeric × ALetter
-//        WB13.  Katakana × Katakana
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
-({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
-{ExtendNumLetEx}*  
-  { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
-//    boundary property values based on criteria outside of the scope of this
-//    annex.  That means that satisfactory treatment of languages like Chinese
-//    or Thai requires special handling.
-// 
-// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together.  This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-//    http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14.  Any ÷ Any
-//
-{Han} { return IDEOGRAPHIC_TYPE; }
-{Hiragana} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3.   CR × LF
-//        WB3a.  (Newline | CR | LF) ÷
-//        WB3b.  ÷ (Newline | CR | LF)
-//        WB14.  Any ÷ Any
-//
-[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex
@ -1,269 +0,0 @@
-package org.apache.lucene.analysis.standard.std31;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements UAX29URLEmailTokenizer, except with a bug 
- * (https://issues.apache.org/jira/browse/LUCENE-3358) where Han and Hiragana
- * characters would be split from combining characters:
- * @deprecated This class is only for exact backwards compatibility
- */
- @Deprecated
-%%
-
-%unicode 6.0
-%integer
-%final
-%public
-%class UAX29URLEmailTokenizerImpl31
-%implements StandardTokenizerInterface
-%function getNextToken
-%char
-
-%include src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format =  ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend =  ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
-
-// Script=Hangul & Aletter
-HangulEx       = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-ALetterEx      = {ALetter}                     ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx      = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx     = {Katakana}                    ({Format} | {Extend})* 
-MidLetterEx    = ({MidLetter} | {MidNumLet})   ({Format} | {Extend})* 
-MidNumericEx   = ({MidNum} | {MidNumLet})      ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet}                ({Format} | {Extend})*
-
-
-// URL and E-mail syntax specifications:
-//
-//     RFC-952:  DOD INTERNET HOST TABLE SPECIFICATION
-//     RFC-1035: DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
-//     RFC-1123: Requirements for Internet Hosts - Application and Support
-//     RFC-1738: Uniform Resource Locators (URL)
-//     RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
-//     RFC-5234: Augmented BNF for Syntax Specifications: ABNF
-//     RFC-5321: Simple Mail Transfer Protocol
-//     RFC-5322: Internet Message Format
-
-%include src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
-
-DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
-DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
-DomainNameLoose  = {DomainLabel} ("." {DomainLabel})*
-
-IPv4DecimalOctet = "0"{0,2} [0-9] | "0"? [1-9][0-9] | "1" [0-9][0-9] | "2" ([0-4][0-9] | "5" [0-5])
-IPv4Address  = {IPv4DecimalOctet} ("." {IPv4DecimalOctet}){3} 
-IPv6Hex16Bit = [0-9A-Fa-f]{1,4}
-IPv6LeastSignificant32Bits = {IPv4Address} | ({IPv6Hex16Bit} ":" {IPv6Hex16Bit})
-IPv6Address =                                                  ({IPv6Hex16Bit} ":"){6} {IPv6LeastSignificant32Bits}
-            |                                             "::" ({IPv6Hex16Bit} ":"){5} {IPv6LeastSignificant32Bits}
-            |                            {IPv6Hex16Bit}?  "::" ({IPv6Hex16Bit} ":"){4} {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,1} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){3} {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,2} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){2} {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,3} {IPv6Hex16Bit})? "::"  {IPv6Hex16Bit} ":"     {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,4} {IPv6Hex16Bit})? "::"                         {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,5} {IPv6Hex16Bit})? "::"                         {IPv6Hex16Bit}
-            | (({IPv6Hex16Bit} ":"){0,6} {IPv6Hex16Bit})? "::"
-
-URIunreserved = [-._~A-Za-z0-9]
-URIpercentEncoded = "%" [0-9A-Fa-f]{2}
-URIsubDelims = [!$&'()*+,;=]
-URIloginSegment = ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims})*
-URIlogin = {URIloginSegment} (":" {URIloginSegment})? "@"
-URIquery    = "?" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
-URIfragment = "#" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
-URIport = ":" [0-9]{1,5}
-URIhostStrict = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameStrict}  
-URIhostLoose  = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameLoose} 
-
-URIauthorityStrict =             {URIhostStrict} {URIport}?
-URIauthorityLoose  = {URIlogin}? {URIhostLoose}  {URIport}?
-
-HTTPsegment = ({URIunreserved} | {URIpercentEncoded} | [;:@&=])*
-HTTPpath = ("/" {HTTPsegment})*
-HTTPscheme = [hH][tT][tT][pP][sS]? "://"
-HTTPurlFull = {HTTPscheme} {URIauthorityLoose}  {HTTPpath}? {URIquery}? {URIfragment}?
-// {HTTPurlNoScheme} excludes {URIlogin}, because it could otherwise accept e-mail addresses
-HTTPurlNoScheme =          {URIauthorityStrict} {HTTPpath}? {URIquery}? {URIfragment}?
-HTTPurl = {HTTPurlFull} | {HTTPurlNoScheme}
-
-FTPorFILEsegment = ({URIunreserved} | {URIpercentEncoded} | [?:@&=])*
-FTPorFILEpath = "/" {FTPorFILEsegment} ("/" {FTPorFILEsegment})*
-FTPtype = ";" [tT][yY][pP][eE] "=" [aAiIdD]
-FTPscheme = [fF][tT][pP] "://"
-FTPurl = {FTPscheme} {URIauthorityLoose} {FTPorFILEpath} {FTPtype}? {URIfragment}?
-
-FILEscheme = [fF][iI][lL][eE] "://"
-FILEurl = {FILEscheme} {URIhostLoose}? {FTPorFILEpath} {URIfragment}?
-
-URL = {HTTPurl} | {FTPurl} | {FILEurl}
-
-EMAILquotedString = [\"] ([\u0001-\u0008\u000B\u000C\u000E-\u0021\u0023-\u005B\u005D-\u007E] | [\\] [\u0000-\u007F])* [\"]
-EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
-EMAILlabel = {EMAILatomText}+ | {EMAILquotedString}
-EMAILlocalPart = {EMAILlabel} ("." {EMAILlabel})*
-EMAILdomainLiteralText = [\u0001-\u0008\u000B\u000C\u000E-\u005A\u005E-\u007F] | [\\] [\u0000-\u007F]
-// DFA minimization allows {IPv6Address} and {IPv4Address} to be included 
-// in the {EMAILbracketedHost} definition without incurring any size penalties, 
-// since {EMAILdomainLiteralText} recognizes all valid IP addresses.
-// The IP address regexes are included in {EMAILbracketedHost} simply as a 
-// reminder that they are acceptable bracketed host forms.
-EMAILbracketedHost = "[" ({EMAILdomainLiteralText}* | {IPv4Address} | [iI][pP][vV] "6:" {IPv6Address}) "]"
-EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
-
-
-%{
-  /** Alphanumeric sequences */
-  public static final int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
-  
-  /** Numbers */
-  public static final int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
-  
-  /**
-   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
-   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
-   * together as as a single token rather than broken up, because the logic
-   * required to break them at word boundaries is too complex for UAX#29.
-   * <p>
-   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
-   */
-  public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
-  
-  public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
-  
-  public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
-  
-  public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
-  
-  public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
-  
-  public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
-  
-  public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
-
-  public final int yychar()
-  {
-    return yychar;
-  }
-
-  /**
-   * Fills CharTermAttribute with the current token text.
-   */
-  public final void getText(CharTermAttribute t) {
-    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-  }
-%}
-
-%%
-
-// UAX#29 WB1. 	sot 	÷ 	
-//        WB2. 		÷ 	eot
-//
-<<EOF>> { return StandardTokenizerInterface.YYEOF; }
-
-{URL}   { return URL_TYPE; }
-{EMAIL} { return EMAIL_TYPE; }
-
-// UAX#29 WB8.   Numeric × Numeric
-//        WB11.  Numeric (MidNum | MidNumLet) × Numeric
-//        WB12.  Numeric × (MidNum | MidNumLet) Numeric
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
-                              | {MidNumericEx} {NumericEx} 
-                              | {NumericEx})*
-{ExtendNumLetEx}* 
-  { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
-  { return HANGUL_TYPE; }
-
-{KatakanaEx}+
-  { return KATAKANA_TYPE; }
-
-// UAX#29 WB5.   ALetter × ALetter
-//        WB6.   ALetter × (MidLetter | MidNumLet) ALetter
-//        WB7.   ALetter (MidLetter | MidNumLet) × ALetter
-//        WB9.   ALetter × Numeric
-//        WB10.  Numeric × ALetter
-//        WB13.  Katakana × Katakana
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
-({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
-{ExtendNumLetEx}*  
-  { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
-//    boundary property values based on criteria outside of the scope of this
-//    annex.  That means that satisfactory treatment of languages like Chinese
-//    or Thai requires special handling.
-// 
-// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together.  This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-//    http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14.  Any ÷ Any
-//
-{Han} { return IDEOGRAPHIC_TYPE; }
-{Hiragana} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3.   CR × LF
-//        WB3a.  (Newline | CR | LF) ÷
-//        WB3b.  ÷ (Newline | CR | LF)
-//        WB14.  Any ÷ Any
-//
-[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/package.html
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/package.html
@ -1,22 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->
-<html><head></head>
-<body>
-Backwards-compatible implementation to match {@link org.apache.lucene.util.Version#LUCENE_31}
-</body>
-</html>
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/ASCIITLD.jflex-macro
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/ASCIITLD.jflex-macro
@ -1,334 +0,0 @@
-/*
- * Copyright 2001-2005 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Thursday, August 4, 2011 11:34:20 AM UTC
-// generated on Thursday, August 4, 2011 11:46:19 PM UTC
-// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
-
-ASCIITLD = "." (
-	  [aA][cC]
-	| [aA][dD]
-	| [aA][eE]
-	| [aA][eE][rR][oO]
-	| [aA][fF]
-	| [aA][gG]
-	| [aA][iI]
-	| [aA][lL]
-	| [aA][mM]
-	| [aA][nN]
-	| [aA][oO]
-	| [aA][qQ]
-	| [aA][rR]
-	| [aA][rR][pP][aA]
-	| [aA][sS]
-	| [aA][sS][iI][aA]
-	| [aA][tT]
-	| [aA][uU]
-	| [aA][wW]
-	| [aA][xX]
-	| [aA][zZ]
-	| [bB][aA]
-	| [bB][bB]
-	| [bB][dD]
-	| [bB][eE]
-	| [bB][fF]
-	| [bB][gG]
-	| [bB][hH]
-	| [bB][iI]
-	| [bB][iI][zZ]
-	| [bB][jJ]
-	| [bB][mM]
-	| [bB][nN]
-	| [bB][oO]
-	| [bB][rR]
-	| [bB][sS]
-	| [bB][tT]
-	| [bB][vV]
-	| [bB][wW]
-	| [bB][yY]
-	| [bB][zZ]
-	| [cC][aA]
-	| [cC][aA][tT]
-	| [cC][cC]
-	| [cC][dD]
-	| [cC][fF]
-	| [cC][gG]
-	| [cC][hH]
-	| [cC][iI]
-	| [cC][kK]
-	| [cC][lL]
-	| [cC][mM]
-	| [cC][nN]
-	| [cC][oO]
-	| [cC][oO][mM]
-	| [cC][oO][oO][pP]
-	| [cC][rR]
-	| [cC][uU]
-	| [cC][vV]
-	| [cC][xX]
-	| [cC][yY]
-	| [cC][zZ]
-	| [dD][eE]
-	| [dD][jJ]
-	| [dD][kK]
-	| [dD][mM]
-	| [dD][oO]
-	| [dD][zZ]
-	| [eE][cC]
-	| [eE][dD][uU]
-	| [eE][eE]
-	| [eE][gG]
-	| [eE][rR]
-	| [eE][sS]
-	| [eE][tT]
-	| [eE][uU]
-	| [fF][iI]
-	| [fF][jJ]
-	| [fF][kK]
-	| [fF][mM]
-	| [fF][oO]
-	| [fF][rR]
-	| [gG][aA]
-	| [gG][bB]
-	| [gG][dD]
-	| [gG][eE]
-	| [gG][fF]
-	| [gG][gG]
-	| [gG][hH]
-	| [gG][iI]
-	| [gG][lL]
-	| [gG][mM]
-	| [gG][nN]
-	| [gG][oO][vV]
-	| [gG][pP]
-	| [gG][qQ]
-	| [gG][rR]
-	| [gG][sS]
-	| [gG][tT]
-	| [gG][uU]
-	| [gG][wW]
-	| [gG][yY]
-	| [hH][kK]
-	| [hH][mM]
-	| [hH][nN]
-	| [hH][rR]
-	| [hH][tT]
-	| [hH][uU]
-	| [iI][dD]
-	| [iI][eE]
-	| [iI][lL]
-	| [iI][mM]
-	| [iI][nN]
-	| [iI][nN][fF][oO]
-	| [iI][nN][tT]
-	| [iI][oO]
-	| [iI][qQ]
-	| [iI][rR]
-	| [iI][sS]
-	| [iI][tT]
-	| [jJ][eE]
-	| [jJ][mM]
-	| [jJ][oO]
-	| [jJ][oO][bB][sS]
-	| [jJ][pP]
-	| [kK][eE]
-	| [kK][gG]
-	| [kK][hH]
-	| [kK][iI]
-	| [kK][mM]
-	| [kK][nN]
-	| [kK][pP]
-	| [kK][rR]
-	| [kK][wW]
-	| [kK][yY]
-	| [kK][zZ]
-	| [lL][aA]
-	| [lL][bB]
-	| [lL][cC]
-	| [lL][iI]
-	| [lL][kK]
-	| [lL][rR]
-	| [lL][sS]
-	| [lL][tT]
-	| [lL][uU]
-	| [lL][vV]
-	| [lL][yY]
-	| [mM][aA]
-	| [mM][cC]
-	| [mM][dD]
-	| [mM][eE]
-	| [mM][gG]
-	| [mM][hH]
-	| [mM][iI][lL]
-	| [mM][kK]
-	| [mM][lL]
-	| [mM][mM]
-	| [mM][nN]
-	| [mM][oO]
-	| [mM][oO][bB][iI]
-	| [mM][pP]
-	| [mM][qQ]
-	| [mM][rR]
-	| [mM][sS]
-	| [mM][tT]
-	| [mM][uU]
-	| [mM][uU][sS][eE][uU][mM]
-	| [mM][vV]
-	| [mM][wW]
-	| [mM][xX]
-	| [mM][yY]
-	| [mM][zZ]
-	| [nN][aA]
-	| [nN][aA][mM][eE]
-	| [nN][cC]
-	| [nN][eE]
-	| [nN][eE][tT]
-	| [nN][fF]
-	| [nN][gG]
-	| [nN][iI]
-	| [nN][lL]
-	| [nN][oO]
-	| [nN][pP]
-	| [nN][rR]
-	| [nN][uU]
-	| [nN][zZ]
-	| [oO][mM]
-	| [oO][rR][gG]
-	| [pP][aA]
-	| [pP][eE]
-	| [pP][fF]
-	| [pP][gG]
-	| [pP][hH]
-	| [pP][kK]
-	| [pP][lL]
-	| [pP][mM]
-	| [pP][nN]
-	| [pP][rR]
-	| [pP][rR][oO]
-	| [pP][sS]
-	| [pP][tT]
-	| [pP][wW]
-	| [pP][yY]
-	| [qQ][aA]
-	| [rR][eE]
-	| [rR][oO]
-	| [rR][sS]
-	| [rR][uU]
-	| [rR][wW]
-	| [sS][aA]
-	| [sS][bB]
-	| [sS][cC]
-	| [sS][dD]
-	| [sS][eE]
-	| [sS][gG]
-	| [sS][hH]
-	| [sS][iI]
-	| [sS][jJ]
-	| [sS][kK]
-	| [sS][lL]
-	| [sS][mM]
-	| [sS][nN]
-	| [sS][oO]
-	| [sS][rR]
-	| [sS][tT]
-	| [sS][uU]
-	| [sS][vV]
-	| [sS][yY]
-	| [sS][zZ]
-	| [tT][cC]
-	| [tT][dD]
-	| [tT][eE][lL]
-	| [tT][fF]
-	| [tT][gG]
-	| [tT][hH]
-	| [tT][jJ]
-	| [tT][kK]
-	| [tT][lL]
-	| [tT][mM]
-	| [tT][nN]
-	| [tT][oO]
-	| [tT][pP]
-	| [tT][rR]
-	| [tT][rR][aA][vV][eE][lL]
-	| [tT][tT]
-	| [tT][vV]
-	| [tT][wW]
-	| [tT][zZ]
-	| [uU][aA]
-	| [uU][gG]
-	| [uU][kK]
-	| [uU][sS]
-	| [uU][yY]
-	| [uU][zZ]
-	| [vV][aA]
-	| [vV][cC]
-	| [vV][eE]
-	| [vV][gG]
-	| [vV][iI]
-	| [vV][nN]
-	| [vV][uU]
-	| [wW][fF]
-	| [wW][sS]
-	| [xX][nN]--0[zZ][wW][mM]56[dD]
-	| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
-	| [xX][nN]--3[eE]0[bB]707[eE]
-	| [xX][nN]--45[bB][rR][jJ]9[cC]
-	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
-	| [xX][nN]--90[aA]3[aA][cC]
-	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
-	| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
-	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
-	| [xX][nN]--[fF][iI][qQ][sS]8[sS]
-	| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
-	| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
-	| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
-	| [xX][nN]--[gG]6[wW]251[dD]
-	| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
-	| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
-	| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
-	| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
-	| [xX][nN]--[jJ]6[wW]193[gG]
-	| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
-	| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
-	| [xX][nN]--[kK][pP][rR][wW]13[dD]
-	| [xX][nN]--[kK][pP][rR][yY]57[dD]
-	| [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
-	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
-	| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
-	| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
-	| [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
-	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
-	| [xX][nN]--[oO]3[cC][wW]4[hH]
-	| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
-	| [xX][nN]--[pP]1[aA][iI]
-	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
-	| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
-	| [xX][nN]--[wW][gG][bB][hH]1[cC]
-	| [xX][nN]--[wW][gG][bB][lL]6[aA]
-	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
-	| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
-	| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
-	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
-	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
-	| [xX][xX][xX]
-	| [yY][eE]
-	| [yY][tT]
-	| [zZ][aA]
-	| [zZ][mM]
-	| [zZ][wW]
-	) "."?   // Accept trailing root (empty) domain
-
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/SUPPLEMENTARY.jflex-macro
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/SUPPLEMENTARY.jflex-macro
@ -1,125 +0,0 @@
-/*
- * Copyright 2010 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC
-// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
-
-
-ALetterSupp = (
-	  ([\ud80d][\uDC00-\uDC2E])
-	| ([\ud80c][\uDC00-\uDFFF])
-	| ([\ud809][\uDC00-\uDC62])
-	| ([\ud808][\uDC00-\uDF6E])
-	| ([\ud81a][\uDC00-\uDE38])
-	| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
-	| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
-	| ([\ud801][\uDC00-\uDC9D])
-	| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
-	| ([\ud803][\uDC00-\uDC48])
-	| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
-)
-FormatSupp = (
-	  ([\ud804][\uDCBD])
-	| ([\ud834][\uDD73-\uDD7A])
-	| ([\udb40][\uDC01\uDC20-\uDC7F])
-)
-ExtendSupp = (
-	  ([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
-	| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
-	| ([\ud800][\uDDFD])
-	| ([\udb40][\uDD00-\uDDEF])
-	| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
-)
-NumericSupp = (
-	  ([\ud804][\uDC66-\uDC6F])
-	| ([\ud835][\uDFCE-\uDFFF])
-	| ([\ud801][\uDCA0-\uDCA9])
-)
-KatakanaSupp = (
-	  ([\ud82c][\uDC00])
-)
-MidLetterSupp = (
-	  []
-)
-MidNumSupp = (
-	  []
-)
-MidNumLetSupp = (
-	  []
-)
-ExtendNumLetSupp = (
-	  []
-)
-ExtendNumLetSupp = (
-	  []
-)
-ComplexContextSupp = (
-	  []
-)
-HanSupp = (
-	  ([\ud87e][\uDC00-\uDE1D])
-	| ([\ud86b][\uDC00-\uDFFF])
-	| ([\ud86a][\uDC00-\uDFFF])
-	| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
-	| ([\ud868][\uDC00-\uDFFF])
-	| ([\ud86e][\uDC00-\uDC1D])
-	| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
-	| ([\ud86c][\uDC00-\uDFFF])
-	| ([\ud863][\uDC00-\uDFFF])
-	| ([\ud862][\uDC00-\uDFFF])
-	| ([\ud861][\uDC00-\uDFFF])
-	| ([\ud860][\uDC00-\uDFFF])
-	| ([\ud867][\uDC00-\uDFFF])
-	| ([\ud866][\uDC00-\uDFFF])
-	| ([\ud865][\uDC00-\uDFFF])
-	| ([\ud864][\uDC00-\uDFFF])
-	| ([\ud858][\uDC00-\uDFFF])
-	| ([\ud859][\uDC00-\uDFFF])
-	| ([\ud85a][\uDC00-\uDFFF])
-	| ([\ud85b][\uDC00-\uDFFF])
-	| ([\ud85c][\uDC00-\uDFFF])
-	| ([\ud85d][\uDC00-\uDFFF])
-	| ([\ud85e][\uDC00-\uDFFF])
-	| ([\ud85f][\uDC00-\uDFFF])
-	| ([\ud850][\uDC00-\uDFFF])
-	| ([\ud851][\uDC00-\uDFFF])
-	| ([\ud852][\uDC00-\uDFFF])
-	| ([\ud853][\uDC00-\uDFFF])
-	| ([\ud854][\uDC00-\uDFFF])
-	| ([\ud855][\uDC00-\uDFFF])
-	| ([\ud856][\uDC00-\uDFFF])
-	| ([\ud857][\uDC00-\uDFFF])
-	| ([\ud849][\uDC00-\uDFFF])
-	| ([\ud848][\uDC00-\uDFFF])
-	| ([\ud84b][\uDC00-\uDFFF])
-	| ([\ud84a][\uDC00-\uDFFF])
-	| ([\ud84d][\uDC00-\uDFFF])
-	| ([\ud84c][\uDC00-\uDFFF])
-	| ([\ud84f][\uDC00-\uDFFF])
-	| ([\ud84e][\uDC00-\uDFFF])
-	| ([\ud841][\uDC00-\uDFFF])
-	| ([\ud840][\uDC00-\uDFFF])
-	| ([\ud843][\uDC00-\uDFFF])
-	| ([\ud842][\uDC00-\uDFFF])
-	| ([\ud845][\uDC00-\uDFFF])
-	| ([\ud844][\uDC00-\uDFFF])
-	| ([\ud847][\uDC00-\uDFFF])
-	| ([\ud846][\uDC00-\uDFFF])
-)
-HiraganaSupp = (
-	  ([\ud83c][\uDE00])
-	| ([\ud82c][\uDC01])
-)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.java
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
@ -1,272 +0,0 @@
-package org.apache.lucene.analysis.standard.std34;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
-import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements UAX29URLEmailTokenizer, except with a bug
- * (https://issues.apache.org/jira/browse/LUCENE-3880) where "mailto:"
- * URI scheme prepended to an email address will disrupt recognition
- * of the email address.
- * @deprecated This class is only for exact backwards compatibility
- */
- @Deprecated
-%%
-
-%unicode 6.0
-%integer
-%final
-%public
-%class UAX29URLEmailTokenizerImpl34
-%implements StandardTokenizerInterface
-%function getNextToken
-%char
-
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format =  ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend =  ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
-
-// Script=Hangul & Aletter
-HangulEx       = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-ALetterEx      = {ALetter}                     ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx      = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx     = {Katakana}                    ({Format} | {Extend})* 
-MidLetterEx    = ({MidLetter} | {MidNumLet})   ({Format} | {Extend})* 
-MidNumericEx   = ({MidNum} | {MidNumLet})      ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet}                ({Format} | {Extend})*
-
-HanEx = {Han} ({Format} | {Extend})*
-HiraganaEx = {Hiragana} ({Format} | {Extend})*
-
-// URL and E-mail syntax specifications:
-//
-//     RFC-952:  DOD INTERNET HOST TABLE SPECIFICATION
-//     RFC-1035: DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
-//     RFC-1123: Requirements for Internet Hosts - Application and Support
-//     RFC-1738: Uniform Resource Locators (URL)
-//     RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
-//     RFC-5234: Augmented BNF for Syntax Specifications: ABNF
-//     RFC-5321: Simple Mail Transfer Protocol
-//     RFC-5322: Internet Message Format
-
-%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
-
-DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
-DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
-DomainNameLoose  = {DomainLabel} ("." {DomainLabel})*
-
-IPv4DecimalOctet = "0"{0,2} [0-9] | "0"? [1-9][0-9] | "1" [0-9][0-9] | "2" ([0-4][0-9] | "5" [0-5])
-IPv4Address  = {IPv4DecimalOctet} ("." {IPv4DecimalOctet}){3} 
-IPv6Hex16Bit = [0-9A-Fa-f]{1,4}
-IPv6LeastSignificant32Bits = {IPv4Address} | ({IPv6Hex16Bit} ":" {IPv6Hex16Bit})
-IPv6Address =                                                  ({IPv6Hex16Bit} ":"){6} {IPv6LeastSignificant32Bits}
-            |                                             "::" ({IPv6Hex16Bit} ":"){5} {IPv6LeastSignificant32Bits}
-            |                            {IPv6Hex16Bit}?  "::" ({IPv6Hex16Bit} ":"){4} {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,1} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){3} {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,2} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){2} {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,3} {IPv6Hex16Bit})? "::"  {IPv6Hex16Bit} ":"     {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,4} {IPv6Hex16Bit})? "::"                         {IPv6LeastSignificant32Bits}
-            | (({IPv6Hex16Bit} ":"){0,5} {IPv6Hex16Bit})? "::"                         {IPv6Hex16Bit}
-            | (({IPv6Hex16Bit} ":"){0,6} {IPv6Hex16Bit})? "::"
-
-URIunreserved = [-._~A-Za-z0-9]
-URIpercentEncoded = "%" [0-9A-Fa-f]{2}
-URIsubDelims = [!$&'()*+,;=]
-URIloginSegment = ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims})*
-URIlogin = {URIloginSegment} (":" {URIloginSegment})? "@"
-URIquery    = "?" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
-URIfragment = "#" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
-URIport = ":" [0-9]{1,5}
-URIhostStrict = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameStrict}  
-URIhostLoose  = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameLoose} 
-
-URIauthorityStrict =             {URIhostStrict} {URIport}?
-URIauthorityLoose  = {URIlogin}? {URIhostLoose}  {URIport}?
-
-HTTPsegment = ({URIunreserved} | {URIpercentEncoded} | [;:@&=])*
-HTTPpath = ("/" {HTTPsegment})*
-HTTPscheme = [hH][tT][tT][pP][sS]? "://"
-HTTPurlFull = {HTTPscheme} {URIauthorityLoose}  {HTTPpath}? {URIquery}? {URIfragment}?
-// {HTTPurlNoScheme} excludes {URIlogin}, because it could otherwise accept e-mail addresses
-HTTPurlNoScheme =          {URIauthorityStrict} {HTTPpath}? {URIquery}? {URIfragment}?
-HTTPurl = {HTTPurlFull} | {HTTPurlNoScheme}
-
-FTPorFILEsegment = ({URIunreserved} | {URIpercentEncoded} | [?:@&=])*
-FTPorFILEpath = "/" {FTPorFILEsegment} ("/" {FTPorFILEsegment})*
-FTPtype = ";" [tT][yY][pP][eE] "=" [aAiIdD]
-FTPscheme = [fF][tT][pP] "://"
-FTPurl = {FTPscheme} {URIauthorityLoose} {FTPorFILEpath} {FTPtype}? {URIfragment}?
-
-FILEscheme = [fF][iI][lL][eE] "://"
-FILEurl = {FILEscheme} {URIhostLoose}? {FTPorFILEpath} {URIfragment}?
-
-URL = {HTTPurl} | {FTPurl} | {FILEurl}
-
-EMAILquotedString = [\"] ([\u0001-\u0008\u000B\u000C\u000E-\u0021\u0023-\u005B\u005D-\u007E] | [\\] [\u0000-\u007F])* [\"]
-EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
-EMAILlabel = {EMAILatomText}+ | {EMAILquotedString}
-EMAILlocalPart = {EMAILlabel} ("." {EMAILlabel})*
-EMAILdomainLiteralText = [\u0001-\u0008\u000B\u000C\u000E-\u005A\u005E-\u007F] | [\\] [\u0000-\u007F]
-// DFA minimization allows {IPv6Address} and {IPv4Address} to be included 
-// in the {EMAILbracketedHost} definition without incurring any size penalties, 
-// since {EMAILdomainLiteralText} recognizes all valid IP addresses.
-// The IP address regexes are included in {EMAILbracketedHost} simply as a 
-// reminder that they are acceptable bracketed host forms.
-EMAILbracketedHost = "[" ({EMAILdomainLiteralText}* | {IPv4Address} | [iI][pP][vV] "6:" {IPv6Address}) "]"
-EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
-
-
-%{
-  /** Alphanumeric sequences */
-  public static final int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
-  
-  /** Numbers */
-  public static final int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
-  
-  /**
-   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
-   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
-   * together as as a single token rather than broken up, because the logic
-   * required to break them at word boundaries is too complex for UAX#29.
-   * <p>
-   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
-   */
-  public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
-  
-  public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
-  
-  public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
-  
-  public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
-  
-  public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
-  
-  public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
-  
-  public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
-
-  public final int yychar()
-  {
-    return yychar;
-  }
-
-  /**
-   * Fills CharTermAttribute with the current token text.
-   */
-  public final void getText(CharTermAttribute t) {
-    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-  }
-%}
-
-%%
-
-// UAX#29 WB1. 	sot 	÷ 	
-//        WB2. 		÷ 	eot
-//
-<<EOF>> { return StandardTokenizerInterface.YYEOF; }
-
-{URL}   { return URL_TYPE; }
-{EMAIL} { return EMAIL_TYPE; }
-
-// UAX#29 WB8.   Numeric × Numeric
-//        WB11.  Numeric (MidNum | MidNumLet) × Numeric
-//        WB12.  Numeric × (MidNum | MidNumLet) Numeric
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
-                              | {MidNumericEx} {NumericEx} 
-                              | {NumericEx})*
-{ExtendNumLetEx}* 
-  { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
-  { return HANGUL_TYPE; }
-
-{KatakanaEx}+
-  { return KATAKANA_TYPE; }
-
-// UAX#29 WB5.   ALetter × ALetter
-//        WB6.   ALetter × (MidLetter | MidNumLet) ALetter
-//        WB7.   ALetter (MidLetter | MidNumLet) × ALetter
-//        WB9.   ALetter × Numeric
-//        WB10.  Numeric × ALetter
-//        WB13.  Katakana × Katakana
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
-({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
-{ExtendNumLetEx}*  
-  { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
-//    boundary property values based on criteria outside of the scope of this
-//    annex.  That means that satisfactory treatment of languages like Chinese
-//    or Thai requires special handling.
-// 
-// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together.  This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-//    http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14.  Any ÷ Any
-//
-{HanEx} { return IDEOGRAPHIC_TYPE; }
-{HiraganaEx} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3.   CR × LF
-//        WB3a.  (Newline | CR | LF) ÷
-//        WB3b.  ÷ (Newline | CR | LF)
-//        WB14.  Any ÷ Any
-//
-[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html
@ -1,22 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->
-<html><head></head>
-<body>
-Backwards-compatible implementation to match {@link org.apache.lucene.util.Version#LUCENE_34}
-</body>
-</html>
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@ -33,13 +33,6 @@ import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
- * <p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ThaiAnalyzer:
- * <ul>
- *   <li> As of 3.6, a set of Thai stopwords is used by default
- * </ul>
 */
 public final class ThaiAnalyzer extends StopwordAnalyzerBase {
  
@ -84,7 +77,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
   * @param matchVersion lucene compatibility version
   */
  public ThaiAnalyzer(Version matchVersion) {
-    this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STOP_SET : StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
@ -112,8 +105,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
-    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new LowerCaseFilter(matchVersion, result);
+    result = new LowerCaseFilter(matchVersion, result);
    result = new ThaiWordFilter(matchVersion, result);
    return new TokenStreamComponents(source, new StopFilter(matchVersion,
        result, stopwords));
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@ -23,7 +23,6 @@ import java.util.Locale;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -34,10 +33,6 @@ import org.apache.lucene.util.Version;
 /**
 * {@link TokenFilter} that use {@link java.text.BreakIterator} to break each 
 * Token that is Thai into separate Token(s) for each Thai word.
- * <p>Please note: Since matchVersion 3.1 on, this filter no longer lowercases non-thai text.
- * {@link ThaiAnalyzer} will insert a {@link LowerCaseFilter} before this filter
- * so the behaviour of the Analyzer does not change. With version 3.1, the filter handles
- * position increments correctly.
 * <p>WARNING: this filter may not be supported by all JREs.
 *    It is known to work with Sun/Oracle and Harmony JREs.
 *    If your application needs to be fully portable, consider using ICUTokenizer instead,
@ -58,8 +53,6 @@ public final class ThaiWordFilter extends TokenFilter {
  private final BreakIterator breaker = (BreakIterator) proto.clone();
  private final CharArrayIterator charIterator = CharArrayIterator.newWordInstance();
  
-  private final boolean handlePosIncr;
-  
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
@ -72,11 +65,9 @@ public final class ThaiWordFilter extends TokenFilter {

  /** Creates a new ThaiWordFilter with the specified match version. */
  public ThaiWordFilter(Version matchVersion, TokenStream input) {
-    super(matchVersion.onOrAfter(Version.LUCENE_31) ?
-      input : new LowerCaseFilter(matchVersion, input));
+    super(input);
    if (!DBBI_AVAILABLE)
      throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
-    handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_31);
  }
  
  @Override
@ -92,7 +83,7 @@ public final class ThaiWordFilter extends TokenFilter {
        } else {
          offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
        }
-        if (handlePosIncr) posAtt.setPositionIncrement(1);
+        posAtt.setPositionIncrement(1);
        return true;
      }
      hasMoreTokensInClone = false;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@ -30,40 +30,6 @@ import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;

 /**
 * An abstract base class for simple, character-oriented tokenizers. 
- * <p>
- * <a name="version">You must specify the required {@link Version} compatibility
- * when creating {@link CharTokenizer}:
- * <ul>
- * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token codepoints. See {@link #isTokenChar(int)} and
- * {@link #normalize(int)} for details.</li>
- * </ul>
- * <p>
- * A new {@link CharTokenizer} API has been introduced with Lucene 3.1. This API
- * moved from UTF-16 code units to UTF-32 codepoints to eventually add support
- * for <a href=
- * "http://java.sun.com/j2se/1.5.0/docs/api/java/lang/Character.html#supplementary"
- * >supplementary characters</a>. The old <i>char</i> based API has been
- * deprecated and should be replaced with the <i>int</i> based methods
- * {@link #isTokenChar(int)} and {@link #normalize(int)}.
- * </p>
- * <p>
- * As of Lucene 3.1 each {@link CharTokenizer} - constructor expects a
- * {@link Version} argument. Based on the given {@link Version} either the new
- * API or a backwards compatibility layer is used at runtime. For
- * {@link Version} < 3.1 the backwards compatibility layer ensures correct
- * behavior even for indexes build with previous versions of Lucene. If a
- * {@link Version} >= 3.1 is used {@link CharTokenizer} requires the new API to
- * be implemented by the instantiated class. Yet, the old <i>char</i> based API
- * is not required anymore even if backwards compatibility must be preserved.
- * {@link CharTokenizer} subclasses implementing the new API are fully backwards
- * compatible if instantiated with {@link Version} < 3.1.
- * </p>
- * <p>
- * <strong>Note:</strong> If you use a subclass of {@link CharTokenizer} with {@link Version} >=
- * 3.1 on an index build with a version < 3.1, created tokens might not be
- * compatible with the terms in your index.
- * </p>
 **/
 public abstract class CharTokenizer extends Tokenizer {
  
@ -71,7 +37,7 @@ public abstract class CharTokenizer extends Tokenizer {
   * Creates a new {@link CharTokenizer} instance
   * 
   * @param matchVersion
-   *          Lucene version to match See {@link <a href="#version">above</a>}
+   *          Lucene version to match
   * @param input
   *          the input to split up into tokens
   */
@ -84,7 +50,7 @@ public abstract class CharTokenizer extends Tokenizer {
   * Creates a new {@link CharTokenizer} instance
   * 
   * @param matchVersion
-   *          Lucene version to match See {@link <a href="#version">above</a>}
+   *          Lucene version to match
   * @param source
   *          the attribute source to use for this {@link Tokenizer}
   * @param input
@ -100,7 +66,7 @@ public abstract class CharTokenizer extends Tokenizer {
   * Creates a new {@link CharTokenizer} instance
   * 
   * @param matchVersion
-   *          Lucene version to match See {@link <a href="#version">above</a>}
+   *          Lucene version to match
   * @param factory
   *          the attribute factory to use for this {@link Tokenizer}
   * @param input
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
@ -43,7 +43,12 @@ public abstract class CharacterUtils {
   *         {@link Version} instance.
   */
  public static CharacterUtils getInstance(final Version matchVersion) {
-    return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
+    return JAVA_5;
+  }
+  
+  /** explicitly returns a version matching java 4 semantics */
+  public static CharacterUtils getJava4Instance() {
+    return JAVA_4;
  }

  /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
@ -98,7 +98,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer {
    Reader reader = null;
    try {
      reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8);
-      return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_31, 16, ignoreCase));
+      return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
    } finally {
      IOUtils.close(reader);
    }
--- a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
@ -20,7 +20,6 @@ package org.apache.lucene.collation;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
 import org.apache.lucene.util.Version;

 import java.text.Collator;
@ -28,12 +27,11 @@ import java.io.Reader;

 /**
 * <p>
- *   Filters {@link KeywordTokenizer} with {@link CollationKeyFilter}.
+ *   Configures {@link KeywordTokenizer} with {@link CollationAttributeFactory}.
 * </p>
 * <p>
 *   Converts the token into its {@link java.text.CollationKey}, and then
- *   encodes the CollationKey either directly or with 
- *   {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow 
+ *   encodes the CollationKey directly to allow 
 *   it to be stored as an index term.
 * </p>
 * <p>
@ -74,49 +72,24 @@ import java.io.Reader;
 *   CollationKeyAnalyzer to generate index terms, do not use
 *   ICUCollationKeyAnalyzer on the query side, or vice versa.
 * </p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating CollationKeyAnalyzer:
- * <ul>
- *   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
- *   versions will encode the bytes with {@link IndexableBinaryStringTools}.
- * </ul>
 */
 public final class CollationKeyAnalyzer extends Analyzer {
-  private final Collator collator;
  private final CollationAttributeFactory factory;
-  private final Version matchVersion;
  
  /**
   * Create a new CollationKeyAnalyzer, using the specified collator.
   * 
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param collator CollationKey generator
   */
  public CollationKeyAnalyzer(Version matchVersion, Collator collator) {
-    this.matchVersion = matchVersion;
-    this.collator = collator;
    this.factory = new CollationAttributeFactory(collator);
  }
-  
-  /**
-   * @deprecated Use {@link CollationKeyAnalyzer#CollationKeyAnalyzer(Version, Collator)}
-   *   and specify a version instead. This ctor will be removed in Lucene 5.0
-   */
-  @Deprecated
-  public CollationKeyAnalyzer(Collator collator) {
-    this(Version.LUCENE_31, collator);
-  }

  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    if (matchVersion.onOrAfter(Version.LUCENE_40)) {
-      KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
-      return new TokenStreamComponents(tokenizer, tokenizer);
-    } else {
-      KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
-      return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator));
-    }
+    KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+    return new TokenStreamComponents(tokenizer, tokenizer);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java
@ -1,108 +0,0 @@
-package org.apache.lucene.collation;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.IndexableBinaryStringTools;
-
-import java.io.IOException;
-import java.text.Collator;
-
-
-/**
- * <p>
- *   Converts each token into its {@link java.text.CollationKey}, and then
- *   encodes the CollationKey with {@link IndexableBinaryStringTools}, to allow 
- *   it to be stored as an index term.
- * </p>
- * <p>
- *   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- *   index and query time -- CollationKeys are only comparable when produced by
- *   the same Collator.  Since {@link java.text.RuleBasedCollator}s are not
- *   independently versioned, it is unsafe to search against stored
- *   CollationKeys unless the following are exactly the same (best practice is
- *   to store this information with the index and check that they remain the
- *   same at query time):
- * </p>
- * <ol>
- *   <li>JVM vendor</li>
- *   <li>JVM version, including patch version</li>
- *   <li>
- *     The language (and country and variant, if specified) of the Locale
- *     used when constructing the collator via
- *     {@link Collator#getInstance(java.util.Locale)}.
- *   </li>
- *   <li>
- *     The collation strength used - see {@link Collator#setStrength(int)}
- *   </li>
- * </ol> 
- * <p>
- *   The <code>ICUCollationKeyFilter</code> in the analysis-icu package 
- *   uses ICU4J's Collator, which makes its
- *   version available, thus allowing collation to be versioned independently
- *   from the JVM.  ICUCollationKeyFilter is also significantly faster and
- *   generates significantly shorter keys than CollationKeyFilter.  See
- *   <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
- *   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
- *   generation timing and key length comparisons between ICU4J and
- *   java.text.Collator over several languages.
- * </p>
- * <p>
- *   CollationKeys generated by java.text.Collators are not compatible
- *   with those those generated by ICU Collators.  Specifically, if you use 
- *   CollationKeyFilter to generate index terms, do not use
- *   ICUCollationKeyFilter on the query side, or vice versa.
- * </p>
- * @deprecated Use {@link CollationAttributeFactory} instead, which encodes
- *  terms directly as bytes. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class CollationKeyFilter extends TokenFilter {
-  private final Collator collator;
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-
-  /**
-   * @param input Source token stream
-   * @param collator CollationKey generator
-   */
-  public CollationKeyFilter(TokenStream input, Collator collator) {
-    super(input);
-    // clone in case JRE doesnt properly sync,
-    // or to reduce contention in case they do
-    this.collator = (Collator) collator.clone();
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-    if (input.incrementToken()) {
-      byte[] collationKey = collator.getCollationKey(termAtt.toString()).toByteArray();
-      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
-          collationKey, 0, collationKey.length);
-      termAtt.resizeBuffer(encodedLength);
-      termAtt.setLength(encodedLength);
-      IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
-          termAtt.buffer(), 0, encodedLength);
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicLetterTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicLetterTokenizer.java
@ -1,47 +0,0 @@
-package org.apache.lucene.analysis.ar;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.util.Version;
-
-/**
- * Testcase for {@link TestArabicLetterTokenizer}
- * @deprecated (3.1) Remove in Lucene 5.0
- */
-@Deprecated
-public class TestArabicLetterTokenizer extends BaseTokenStreamTestCase {
-  
-  public void testArabicLetterTokenizer() throws IOException {
-    StringReader reader = new StringReader("1234567890 Tokenizer \ud801\udc1c\u0300test");
-    ArabicLetterTokenizer tokenizer = new ArabicLetterTokenizer(Version.LUCENE_31,
-        reader);
-    assertTokenStreamContents(tokenizer, new String[] {"Tokenizer",
-        "\ud801\udc1c\u0300test"});
-  }
-  
-  public void testArabicLetterTokenizerBWCompat() throws IOException {
-    StringReader reader = new StringReader("1234567890 Tokenizer \ud801\udc1c\u0300test");
-    ArabicLetterTokenizer tokenizer = new ArabicLetterTokenizer(Version.LUCENE_30,
-        reader);
-    assertTokenStreamContents(tokenizer, new String[] {"Tokenizer", "\u0300test"});
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
@ -23,6 +23,7 @@ import java.io.StringReader;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;

@ -88,7 +89,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
  }  
  
  private void check(final String input, final String expected) throws IOException {
-    ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+    MockTokenizer tokenStream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
    assertTokenStreamContents(filter, new String[]{expected});
  }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
@ -23,6 +23,7 @@ import java.io.StringReader;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
@ -121,14 +122,14 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("ساهدهات");
-    ArabicLetterTokenizer tokenStream  = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
+    MockTokenizer tokenStream  = new MockTokenizer(new StringReader("ساهدهات"), MockTokenizer.WHITESPACE, false);

    ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerFilter(tokenStream, set));
    assertTokenStreamContents(filter, new String[]{"ساهدهات"});
  }

  private void check(final String input, final String expected) throws IOException {
-    ArabicLetterTokenizer tokenStream  = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+    MockTokenizer tokenStream  = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
    assertTokenStreamContents(filter, new String[]{expected});
  }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
@ -68,7 +68,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
  }
  
  public void testWithStemExclusionSet() throws IOException {
-    CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
+    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("строеве");
    Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
@ -217,7 +217,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
  }

  public void testWithKeywordAttribute() throws IOException {
-    CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
+    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("строеве");
    MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
@ -1,281 +0,0 @@
-package org.apache.lucene.analysis.cjk;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.util.Version;
-
-/** @deprecated Remove when CJKTokenizer is removed (5.0) */
-@Deprecated
-public class TestCJKTokenizer extends BaseTokenStreamTestCase {
-  
-  class TestToken {
-    String termText;
-    int start;
-    int end;
-    String type;
-  }
-
-  public TestToken newToken(String termText, int start, int end, int type) {
-    TestToken token = new TestToken();
-    token.termText = termText;
-    token.type = CJKTokenizer.TOKEN_TYPE_NAMES[type];
-    token.start = start;
-    token.end = end;
-    return token;
-  }
-
-  public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
-    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
-    String terms[] = new String[out_tokens.length];
-    int startOffsets[] = new int[out_tokens.length];
-    int endOffsets[] = new int[out_tokens.length];
-    String types[] = new String[out_tokens.length];
-    for (int i = 0; i < out_tokens.length; i++) {
-      terms[i] = out_tokens[i].termText;
-      startOffsets[i] = out_tokens[i].start;
-      endOffsets[i] = out_tokens[i].end;
-      types[i] = out_tokens[i].type;
-    }
-    assertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null);
-  }
-  
-  public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
-    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
-    String terms[] = new String[out_tokens.length];
-    int startOffsets[] = new int[out_tokens.length];
-    int endOffsets[] = new int[out_tokens.length];
-    String types[] = new String[out_tokens.length];
-    for (int i = 0; i < out_tokens.length; i++) {
-      terms[i] = out_tokens[i].termText;
-      startOffsets[i] = out_tokens[i].start;
-      endOffsets[i] = out_tokens[i].end;
-      types[i] = out_tokens[i].type;
-    }
-    assertAnalyzesToReuse(analyzer, str, terms, startOffsets, endOffsets, types, null);
-  }
-  
-  public void testJa1() throws IOException {
-    String str = "\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341";
-       
-    TestToken[] out_tokens = { 
-      newToken("\u4e00\u4e8c", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u4e8c\u4e09", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u4e09\u56db", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u56db\u4e94", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u4e94\u516d", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u516d\u4e03", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u4e03\u516b", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u516b\u4e5d", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u4e5d\u5341", 8,10, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  public void testJa2() throws IOException {
-    String str = "\u4e00 \u4e8c\u4e09\u56db \u4e94\u516d\u4e03\u516b\u4e5d \u5341";
-       
-    TestToken[] out_tokens = { 
-      newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u4e8c\u4e09", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u4e09\u56db", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u4e94\u516d", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u516d\u4e03", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u4e03\u516b", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u516b\u4e5d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u5341", 12,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  public void testC() throws IOException {
-    String str = "abc defgh ijklmn opqrstu vwxy z";
-       
-    TestToken[] out_tokens = { 
-      newToken("abc", 0, 3, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("defgh", 4, 9, CJKTokenizer.SINGLE_TOKEN_TYPE),
-      newToken("ijklmn", 10, 16, CJKTokenizer.SINGLE_TOKEN_TYPE),
-      newToken("opqrstu", 17, 24, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("vwxy", 25, 29, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("z", 30, 31, CJKTokenizer.SINGLE_TOKEN_TYPE),
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  public void testMix() throws IOException {
-    String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
-       
-    TestToken[] out_tokens = { 
-      newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304f\u3051", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3051\u3053", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  public void testMix2() throws IOException {
-    String str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053";
-       
-    TestToken[] out_tokens = { 
-      newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304f\u3051", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3053", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-
-  public void testSingleChar() throws IOException {
-    String str = "\u4e00";
-       
-    TestToken[] out_tokens = { 
-      newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  /*
-   * Full-width text is normalized to half-width 
-   */
-  public void testFullWidth() throws Exception {
-    String str = "Ｔｅｓｔ １２３４";
-    TestToken[] out_tokens = { 
-        newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-        newToken("1234", 5, 9, CJKTokenizer.SINGLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  /*
-   * Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3 
-   */
-  public void testNonIdeographic() throws Exception {
-    String str = "\u4e00 روبرت موير";
-    TestToken[] out_tokens = {
-        newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("رو", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("وب", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("بر", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("رت", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("مو", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("وي", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("ير", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  /*
-   * Non-english text with nonletters (non-spacing marks,etc) is treated as C1C2 C2C3,
-   * except for words are split around non-letters.
-   */
-  public void testNonIdeographicNonLetter() throws Exception {
-    String str = "\u4e00 رُوبرت موير";
-    TestToken[] out_tokens = {
-        newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("ر", 2, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("وب", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("بر", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("رت", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("مو", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("وي", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("ير", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKToken(str, out_tokens);
-  }
-  
-  public void testTokenStream() throws Exception {
-    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
-    assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02", 
-        new String[] { "\u4e00\u4e01", "\u4e01\u4e02"});
-  }
-  
-  public void testReusableTokenStream() throws Exception {
-    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
-    String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
-    
-    TestToken[] out_tokens = { 
-      newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304f\u3051", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3051\u3053", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKTokenReusable(analyzer, str, out_tokens);
-    
-    str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053";
-    TestToken[] out_tokens2 = { 
-      newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), 
-      newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), 
-      newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u304f\u3051", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-      newToken("\u3053", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)
-    };
-    checkCJKTokenReusable(analyzer, str, out_tokens2);
-  }
-  
-  /**
-   * LUCENE-2207: wrong offset calculated by end() 
-   */
-  public void testFinalOffset() throws IOException {
-    checkCJKToken("あい", new TestToken[] { 
-        newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
-    checkCJKToken("あい   ", new TestToken[] { 
-        newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
-    checkCJKToken("test", new TestToken[] { 
-        newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE) });
-    checkCJKToken("test   ", new TestToken[] { 
-        newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE) });
-    checkCJKToken("あいtest", new TestToken[] {
-        newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE),
-        newToken("test", 2, 6, CJKTokenizer.SINGLE_TOKEN_TYPE) });
-    checkCJKToken("testあい    ", new TestToken[] { 
-        newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
-        newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
-  }
-  
-  /** blast some random strings through the analyzer */
-  public void testRandomStrings() throws Exception {
-    checkRandomData(random(), new CJKAnalyzer(Version.LUCENE_30), 10000*RANDOM_MULTIPLIER);
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
@ -1,126 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.Version;
-
-
-/** @deprecated Remove this test when ChineseAnalyzer is removed. */
-@Deprecated
-public class TestChineseTokenizer extends BaseTokenStreamTestCase
-{
-    public void testOtherLetterOffset() throws IOException
-    {
-        String s = "a天b";
-        ChineseTokenizer tokenizer = new ChineseTokenizer(new StringReader(s));
-
-        int correctStartOffset = 0;
-        int correctEndOffset = 1;
-        OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
-        while (tokenizer.incrementToken()) {
-          assertEquals(correctStartOffset, offsetAtt.startOffset());
-          assertEquals(correctEndOffset, offsetAtt.endOffset());
-          correctStartOffset++;
-          correctEndOffset++;
-        }
-    }
-    
-    public void testReusableTokenStream() throws Exception
-    {
-      Analyzer a = new ChineseAnalyzer();
-      assertAnalyzesToReuse(a, "中华人民共和国", 
-        new String[] { "中", "华", "人", "民", "共", "和", "国" },
-        new int[] { 0, 1, 2, 3, 4, 5, 6 },
-        new int[] { 1, 2, 3, 4, 5, 6, 7 });
-      assertAnalyzesToReuse(a, "北京市", 
-        new String[] { "北", "京", "市" },
-        new int[] { 0, 1, 2 },
-        new int[] { 1, 2, 3 });
-    }
-    
-    /*
-     * Analyzer that just uses ChineseTokenizer, not ChineseFilter.
-     * convenience to show the behavior of the tokenizer
-     */
-    private class JustChineseTokenizerAnalyzer extends Analyzer {
-      @Override
-      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        return new TokenStreamComponents(new ChineseTokenizer(reader));
-      }   
-    }
-    
-    /*
-     * Analyzer that just uses ChineseFilter, not ChineseTokenizer.
-     * convenience to show the behavior of the filter.
-     */
-    private class JustChineseFilterAnalyzer extends Analyzer {
-      @Override
-      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
-        return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
-      }
-    }
-    
-    /*
-     * ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter
-     */
-    public void testNumerics() throws Exception
-    { 
-      Analyzer justTokenizer = new JustChineseTokenizerAnalyzer();
-      assertAnalyzesTo(justTokenizer, "中1234", new String[] { "中", "1234" });
-          
-      // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token.
-      Analyzer a = new ChineseAnalyzer(); 
-      assertAnalyzesTo(a, "中1234", new String[] { "中" });
-    }
-    
-    /*
-     * ChineseTokenizer tokenizes english similar to SimpleAnalyzer.
-     * it will lowercase terms automatically.
-     * 
-     * ChineseFilter has an english stopword list, it also removes any single character tokens.
-     * the stopword list is case-sensitive.
-     */
-    public void testEnglish() throws Exception
-    {
-      Analyzer chinese = new ChineseAnalyzer();
-      assertAnalyzesTo(chinese, "This is a Test. b c d",
-          new String[] { "test" });
-      
-      Analyzer justTokenizer = new JustChineseTokenizerAnalyzer();
-      assertAnalyzesTo(justTokenizer, "This is a Test. b c d",
-          new String[] { "this", "is", "a", "test", "b", "c", "d" });
-      
-      Analyzer justFilter = new JustChineseFilterAnalyzer();
-      assertAnalyzesTo(justFilter, "This is a Test. b c d", 
-          new String[] { "This", "Test." });
-    }
-    
-    /** blast some random strings through the analyzer */
-    public void testRandomStrings() throws Exception {
-      checkRandomData(random(), new ChineseAnalyzer(), 10000*RANDOM_MULTIPLIER);
-    }
-
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
@ -27,7 +27,6 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.index.Payload;
-import org.apache.lucene.util.Version;

 public class TestAnalyzers extends BaseTokenStreamTestCase {

@ -182,15 +181,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
        "\ud801\udc44test" });
  }

-  /** @deprecated (3.1) */
-  @Deprecated
-  public void testLowerCaseTokenizerBWCompat() throws IOException {
-    StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
-    LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30,
-        reader);
-    assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
-  }
-
  public void testWhitespaceTokenizer() throws IOException {
    StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
    WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@ -198,16 +188,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
    assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
        "\ud801\udc1ctest" });
  }
-
-  /** @deprecated (3.1) */
-  @Deprecated
-  public void testWhitespaceTokenizerBWCompat() throws IOException {
-    StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
-    WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30,
-        reader);
-    assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
-        "\ud801\udc1ctest" });
-  }
  
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
@ -5,8 +5,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.standard.ClassicAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -15,7 +15,6 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Version;

 import java.io.IOException;
 import java.util.Arrays;
@ -137,7 +136,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {

    // 2.4 should not show the bug. But, alas, it's also obsolete,
    // so we check latest released (Robert's gonna break this on 4.0 soon :) )
-    a2 = new ClassicAnalyzer(Version.LUCENE_31);
+    a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
  }

@ -244,7 +243,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
  }

  public void testJava14BWCompatibility() throws Exception {
-    ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
+    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
  }

@ -272,7 +271,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
    writer.addDocument(doc);
    writer.close();

-    IndexReader reader = IndexReader.open(dir);
+    IndexReader reader = DirectoryReader.open(dir);

    // Make sure all terms < max size were indexed
    assertEquals(2, reader.docFreq(new Term("content", "abc")));
@ -306,7 +305,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
    writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
    writer.addDocument(doc);
    writer.close();
-    reader = IndexReader.open(dir);
+    reader = DirectoryReader.open(dir);
    assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
    reader.close();

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
@ -26,6 +26,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@ -58,7 +59,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {

    writer.close();

-    reader = IndexReader.open(directory);
+    reader = DirectoryReader.open(directory);
    searcher = new IndexSearcher(reader);
  }
  
@ -95,7 +96,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
    writer.addDocument(doc);
    writer.close();

-    IndexReader reader = IndexReader.open(dir);
+    IndexReader reader = DirectoryReader.open(dir);
    DocsEnum td = _TestUtil.docs(random(),
                                 reader,
                                 "partnum",
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
@ -230,16 +230,6 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
    checkOneTerm(a, "壹゙", "壹゙"); // ideographic
    checkOneTerm(a, "아゙",  "아゙"); // hangul
  }
-  
-  /** @deprecated remove this and sophisticated backwards layer in 5.0 */
-  @Deprecated
-  public void testCombiningMarksBackwards() throws Exception {
-    Analyzer a = new StandardAnalyzer(Version.LUCENE_33);
-    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
-    checkOneTerm(a, "ザ", "ザ"); // katakana Works
-    checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
-    checkOneTerm(a, "아゙",  "아゙"); // hangul Works
-  }

  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailAnalyzer.java
@ -209,16 +209,6 @@ public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase {
    checkOneTerm(a, "壹゙", "壹゙"); // ideographic
    checkOneTerm(a, "아゙",  "아゙"); // hangul
  }
-  
-  /** @deprecated remove this and sophisticated backwards layer in 5.0 */
-  @Deprecated
-  public void testCombiningMarksBackwards() throws Exception {
-    Analyzer a = new UAX29URLEmailAnalyzer(Version.LUCENE_33);
-    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
-    checkOneTerm(a, "ザ", "ザ"); // katakana Works
-    checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
-    checkOneTerm(a, "아゙",  "아゙"); // hangul Works
-  }

  public void testBasicEmails() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a,
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
@ -453,39 +453,6 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
    checkOneTerm(a, "아゙",  "아゙"); // hangul
  }

-  /** @deprecated remove this and sophisticated backwards layer in 5.0 */
-  @Deprecated
-  public void testCombiningMarksBackwards() throws Exception {
-    Analyzer a = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents
-        (String fieldName, Reader reader) {
-
-        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
-        return new TokenStreamComponents(tokenizer);
-      }
-    };
-    checkOneTerm(a, "ざ", "さ"); // hiragana Bug
-    checkOneTerm(a, "ザ", "ザ"); // katakana Works
-    checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
-    checkOneTerm(a, "아゙",  "아゙"); // hangul Works
-  }
-  
-  // LUCENE-3880
-  /** @deprecated remove this and sophisticated backwards layer in 5.0 */
-  @Deprecated
-  public void testMailtoBackwards()  throws Exception {
-    Analyzer a = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
-        return new TokenStreamComponents(tokenizer);
-      }
-    };
-    assertAnalyzesTo(a, "mailto:test@example.org",
-        new String[] { "mailto:test", "example.org" });
-  }
-
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
@ -31,30 +31,12 @@ import org.apache.lucene.util.Version;
 *
 */
 public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
-  /**
-   * @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
-   */
-  @Deprecated
-  public void testStopWordLegacy() throws Exception {
-    assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_30), "Pokud mluvime o volnem", 
-        new String[] { "mluvime", "volnem" });
-  }
  
  public void testStopWord() throws Exception {
    assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem", 
        new String[] { "mluvim", "voln" });
  }
  
-  /**
-   * @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
-   */
-  @Deprecated
-  public void testReusableTokenStreamLegacy() throws Exception {
-    Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_30);
-    assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
-    assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
-  }
-  
  public void testReusableTokenStream() throws Exception {
    Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;

 public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
  public void testReusableTokenStream() throws Exception {
@ -58,10 +57,6 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
    // a/o/u + e is equivalent to the umlaut form
    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
    checkOneTermReuse(a, "Schaltflaechen", "schaltflach");
-    // here they are with the old stemmer
-    a = new GermanAnalyzer(Version.LUCENE_30);
-    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
-    checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
  }
  
  /** blast some random strings through the analyzer */
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.el;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.util.Version;

 /**
 * A unit test class for verifying the correct operation of the GreekAnalyzer.
@ -47,29 +46,6 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
    assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
        new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
  }
-  
-	/**
-	 * Test the analysis of various greek strings.
-	 *
-	 * @throws Exception in case an error occurs
-	 * @deprecated (3.1) Remove this test when support for 3.0 is no longer needed
-	 */
-  @Deprecated
-	public void testAnalyzerBWCompat() throws Exception {
-		Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
-		// Verify the correct analysis of capitals and small accented letters
-		assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας",
-				new String[] { "μια", "εξαιρετικα", "καλη", "πλουσια", "σειρα", "χαρακτηρων",
-				"ελληνικησ", "γλωσσασ" });
-		// Verify the correct analysis of small letters with diaeresis and the elimination
-		// of punctuation marks
-		assertAnalyzesTo(a, "Προϊόντα (και)     [πολλαπλές] - ΑΝΑΓΚΕΣ",
-				new String[] { "προιοντα", "πολλαπλεσ", "αναγκεσ" });
-		// Verify the correct analysis of capital accented letters and capital letters with diaeresis,
-		// as well as the elimination of stop words
-		assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
-				new String[] { "προυποθεσεισ", "αψογοσ", "μεστοσ", "αλλοι" });
-	}
 	
  public void testReusableTokenStream() throws Exception {
    Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java
@ -23,8 +23,8 @@ import java.io.StringReader;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;

 /**
@ -58,8 +58,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase {
  }

  private void check(final String input, final String expected) throws IOException {
-    ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, 
-        new StringReader(input));
+    MockTokenizer tokenStream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
    PersianNormalizationFilter filter = new PersianNormalizationFilter(
        tokenStream);
    assertTokenStreamContents(filter, new String[]{expected});
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
@ -115,94 +115,6 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {

 	}
 	
-	/**
-	 * @deprecated (3.1) remove this test for Lucene 5.0
-	 */
-	@Deprecated
-	public void testAnalyzer30() throws Exception {
-	    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
-	  
-	    assertAnalyzesTo(fa, "", new String[] {
-	    });
-
-	    assertAnalyzesTo(
-	      fa,
-	      "chien chat cheval",
-	      new String[] { "chien", "chat", "cheval" });
-
-	    assertAnalyzesTo(
-	      fa,
-	      "chien CHAT CHEVAL",
-	      new String[] { "chien", "chat", "cheval" });
-
-	    assertAnalyzesTo(
-	      fa,
-	      "  chien  ,? + = -  CHAT /: > CHEVAL",
-	      new String[] { "chien", "chat", "cheval" });
-
-	    assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
-
-	    assertAnalyzesTo(
-	      fa,
-	      "mot \"entreguillemet\"",
-	      new String[] { "mot", "entreguillemet" });
-
-	    // let's do some french specific tests now  
-
-	    /* 1. couldn't resist
-	     I would expect this to stay one term as in French the minus 
-	    sign is often used for composing words */
-	    assertAnalyzesTo(
-	      fa,
-	      "Jean-François",
-	      new String[] { "jean", "françois" });
-
-	    // 2. stopwords
-	    assertAnalyzesTo(
-	      fa,
-	      "le la chien les aux chat du des à cheval",
-	      new String[] { "chien", "chat", "cheval" });
-
-	    // some nouns and adjectives
-	    assertAnalyzesTo(
-	      fa,
-	      "lances chismes habitable chiste éléments captifs",
-	      new String[] {
-	        "lanc",
-	        "chism",
-	        "habit",
-	        "chist",
-	        "élément",
-	        "captif" });
-
-	    // some verbs
-	    assertAnalyzesTo(
-	      fa,
-	      "finissions souffrirent rugissante",
-	      new String[] { "fin", "souffr", "rug" });
-
-	    // some everything else
-	    // aujourd'hui stays one term which is OK
-	    assertAnalyzesTo(
-	      fa,
-	      "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
-	      new String[] {
-	        "c3po",
-	        "aujourd'hui",
-	        "oeuf",
-	        "ïâöûàä",
-	        "anticonstitutionnel",
-	        "jav" });
-
-	    // some more everything else
-	    // here 1940-1945 stays as one term, 1940:1945 not ?
-	    assertAnalyzesTo(
-	      fa,
-	      "33Bis 1940-1945 1940:1945 (---i+++)*",
-	      new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
-
-	  }
-	
 	public void testReusableTokenStream() throws Exception {
 	  FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
 	  // stopwords
@ -242,22 +154,11 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
    assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouil" });
  }
  
-  /**
-   * Prior to 3.1, this analyzer had no lowercase filter.
-   * stopwords were case sensitive. Preserve this for back compat.
-   * @deprecated (3.1) Remove this test in Lucene 5.0
-   */
-  @Deprecated
-  public void testBuggyStopwordsCasing() throws IOException {
-    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
-    assertAnalyzesTo(a, "Votre", new String[] { "votr" });
-  }
-  
  /**
   * Test that stopwords are not case sensitive
   */
  public void testStopwordsCasing() throws IOException {
-    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
+    FrenchAnalyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(a, "Votre", new String[] { });
  }
  
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
@ -63,11 +63,4 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
    assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
    assertAnalyzesTo(a, "l'Italiano", new String[] { "italian" });
  }
-  
-  /** test that we don't enable this before 3.2*/
-  public void testContractionsBackwards() throws IOException {
-    Analyzer a = new ItalianAnalyzer(Version.LUCENE_31);
-    assertAnalyzesTo(a, "dell'Italia", new String[] { "dell'ital" });
-    assertAnalyzesTo(a, "l'Italiano", new String[] { "l'ital" });
-  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
@ -1,181 +0,0 @@
-package org.apache.lucene.analysis.miscellaneous;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.lang.Thread.UncaughtExceptionHandler;
-import java.util.Arrays;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-
-/**
- * Verifies the behavior of PatternAnalyzer.
- */
-public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
-
-  /**
-   * Test PatternAnalyzer when it is configured with a non-word pattern.
-   * Behavior can be similar to SimpleAnalyzer (depending upon options)
-   */
-  public void testNonWordPattern() throws IOException {
-    // Split on non-letter pattern, do not lowercase, no stopwords
-    PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
-        false, null);
-    check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
-        "The", "quick", "brown", "Fox", "the", "abcd", "dc" });
-
-    // split on non-letter pattern, lowercase, english stopwords
-    PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
-        true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-    check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
-        "quick", "brown", "fox", "abcd", "dc" });
-  }
-
-  /**
-   * Test PatternAnalyzer when it is configured with a whitespace pattern.
-   * Behavior can be similar to WhitespaceAnalyzer (depending upon options)
-   */
-  public void testWhitespacePattern() throws IOException {
-    // Split on whitespace patterns, do not lowercase, no stopwords
-    PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
-        false, null);
-    check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
-        "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
-
-    // Split on whitespace patterns, lowercase, english stopwords
-    PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
-        true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-    check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
-        "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
-  }
-
-  /**
-   * Test PatternAnalyzer when it is configured with a custom pattern. In this
-   * case, text is tokenized on the comma ","
-   */
-  public void testCustomPattern() throws IOException {
-    // Split on comma, do not lowercase, no stopwords
-    PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
-    check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
-        "Are", "some", "Comma", "separated", "words" });
-
-    // split on comma, lowercase, english stopwords
-    PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true,
-        StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-    check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
-        "some", "comma", "separated", "words" });
-  }
-
-  /**
-   * Test PatternAnalyzer against a large document.
-   */
-  public void testHugeDocument() throws IOException {
-    StringBuilder document = new StringBuilder();
-    // 5000 a's
-    char largeWord[] = new char[5000];
-    Arrays.fill(largeWord, 'a');
-    document.append(largeWord);
-
-    // a space
-    document.append(' ');
-
-    // 2000 b's
-    char largeWord2[] = new char[2000];
-    Arrays.fill(largeWord2, 'b');
-    document.append(largeWord2);
-
-    // Split on whitespace patterns, do not lowercase, no stopwords
-    PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
-        false, null);
-    check(a, document.toString(), new String[] { new String(largeWord),
-        new String(largeWord2) });
-  }
-
-  /**
-   * Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
-   * several methods are verified:
-   * <ul>
-   * <li>Analysis with a normal Reader
-   * <li>Analysis with a FastStringReader
-   * <li>Analysis with a String
-   * </ul>
-   */
-  private void check(PatternAnalyzer analyzer, String document,
-      String expected[]) throws IOException {
-    // ordinary analysis of a Reader
-    assertAnalyzesTo(analyzer, document, expected);
-
-    // analysis with a "FastStringReader"
-    TokenStream ts = analyzer.tokenStream("dummy",
-        new PatternAnalyzer.FastStringReader(document));
-    assertTokenStreamContents(ts, expected);
-
-    // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
-    TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
-    assertTokenStreamContents(ts2, expected);
-  }
-  
-  /** blast some random strings through the analyzer */
-  public void testRandomStrings() throws Exception {
-    Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-    
-    // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
-    final UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
-    Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
-      @Override
-      public void uncaughtException(Thread thread, Throwable throwable) {
-        assumeTrue("not failing due to jre bug ", !isJREBug7104012(throwable));
-        // otherwise its some other bug, pass to default handler
-        savedHandler.uncaughtException(thread, throwable);
-      }
-    });
-    
-    try {
-      Thread.getDefaultUncaughtExceptionHandler();
-      checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
-    } catch (ArrayIndexOutOfBoundsException ex) {
-      assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
-      throw ex; // otherwise rethrow
-    } finally {
-      Thread.setDefaultUncaughtExceptionHandler(savedHandler);
-    }
-  }
-  
-  static boolean isJREBug7104012(Throwable t) {
-    if (!(t instanceof ArrayIndexOutOfBoundsException)) {
-      // BaseTokenStreamTestCase now wraps exc in a new RuntimeException:
-      t = t.getCause();
-      if (!(t instanceof ArrayIndexOutOfBoundsException)) {
-        return false;
-      }
-    }
-    StackTraceElement trace[] = t.getStackTrace();
-    for (StackTraceElement st : trace) {
-      if ("java.text.RuleBasedBreakIterator".equals(st.getClassName()) 
-          && "lookupBackwardState".equals(st.getMethodName())) {
-        return true;
-      }
-    }
-    return false;
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -61,7 +62,7 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
    writer.addDocument(doc);
    writer.close();

-    IndexReader reader = IndexReader.open(dir);
+    IndexReader reader = DirectoryReader.open(dir);
    Term t = new Term("field", "x");
    assertEquals(1, reader.docFreq(t));
    reader.close();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
@ -112,17 +112,6 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
 	 check("ophouden", "ophoud");
  }
  
-  /**
-   * @deprecated (3.1) remove this test in Lucene 5.0
-   */
-  @Deprecated
-  public void testOldBuggyStemmer() throws Exception {
-    Analyzer a = new DutchAnalyzer(Version.LUCENE_30);
-    checkOneTermReuse(a, "opheffen", "ophef"); // versus snowball 'opheff'
-    checkOneTermReuse(a, "opheffende", "ophef"); // versus snowball 'opheff'
-    checkOneTermReuse(a, "opheffing", "ophef"); // versus snowball 'opheff'
-  }
-  
  public void testSnowballCorrectness() throws Exception {
    Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
    checkOneTermReuse(a, "opheffen", "opheff");
@ -139,7 +128,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
  }
  
  public void testExclusionTableViaCtor() throws IOException {
-    CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
+    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("lichamelijk");
    DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
    assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
@ -158,33 +147,11 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
    checkOneTerm(a, "fiets", "fiets");
  }
  
-  /**
-   * prior to 3.6, this confusingly did not happen if 
-   * you specified your own stoplist!!!!
-   * @deprecated (3.6) Remove this test in Lucene 5.0
-   */
-  @Deprecated
-  public void testBuggyStemOverrides() throws IOException {
-    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_35, CharArraySet.EMPTY_SET);
-    checkOneTerm(a, "fiets", "fiet");
-  }
-  
-  /**
-   * Prior to 3.1, this analyzer had no lowercase filter.
-   * stopwords were case sensitive. Preserve this for back compat.
-   * @deprecated (3.1) Remove this test in Lucene 5.0
-   */
-  @Deprecated
-  public void testBuggyStopwordsCasing() throws IOException {
-    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_30);
-    assertAnalyzesTo(a, "Zelf", new String[] { "zelf" });
-  }
-  
  /**
   * Test that stopwords are not case sensitive
   */
  public void testStopwordsCasing() throws IOException {
-    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_31);
+    DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(a, "Zelf", new String[] { });
  }
  
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
@ -20,6 +20,7 @@ import org.apache.lucene.analysis.*;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -53,7 +54,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
      writer.addDocument(doc);
    }
    writer.close();
-    reader = IndexReader.open(dir);
+    reader = DirectoryReader.open(dir);
  }

  @Override
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
@ -57,15 +57,6 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
    assertEquals( "ABEDCF", new String( buffer ) );
  }
  
-  /**
-   * Test the broken 3.0 behavior, for back compat
-   * @deprecated (3.1) Remove in Lucene 5.0
-   */
-  @Deprecated
-  public void testBackCompat() throws Exception {
-    assertEquals("\uDF05\uD866\uDF05\uD866", ReverseStringFilter.reverse(Version.LUCENE_30, "𩬅𩬅"));
-  }
-  
  public void testReverseSupplementary() throws Exception {
    // supplementary at end
    assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅"));
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
@ -37,16 +37,6 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase {
      assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" });
    }
    
-    /** @deprecated (3.1) remove this test in Lucene 5.0: stopwords changed */
-    @Deprecated
-    public void testReusableTokenStream30() throws Exception {
-      Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
-      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
-          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
-      assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
-          new String[] { "знан", "хран", "тайн" });
-    }
-    
    public void testReusableTokenStream() throws Exception {
      Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT);
      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
@ -1,47 +0,0 @@
-package org.apache.lucene.analysis.ru;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.util.Version;
-
-/**
- * Testcase for {@link RussianLetterTokenizer}
- * @deprecated (3.1) Remove this test class in Lucene 5.0
- */
-@Deprecated
-public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {
-  
-  public void testRussianLetterTokenizer() throws IOException {
-    StringReader reader = new StringReader("1234567890 Вместе \ud801\udc1ctest");
-    RussianLetterTokenizer tokenizer = new RussianLetterTokenizer(Version.LUCENE_CURRENT,
-        reader);
-    assertTokenStreamContents(tokenizer, new String[] {"1234567890", "Вместе",
-        "\ud801\udc1ctest"});
-  }
-  
-  public void testRussianLetterTokenizerBWCompat() throws IOException {
-    StringReader reader = new StringReader("1234567890 Вместе \ud801\udc1ctest");
-    RussianLetterTokenizer tokenizer = new RussianLetterTokenizer(Version.LUCENE_30,
-        reader);
-    assertTokenStreamContents(tokenizer, new String[] {"1234567890", "Вместе", "test"});
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -74,7 +75,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {

    writer.close();

-    reader = IndexReader.open(directory);
+    reader = DirectoryReader.open(directory);
    searcher = new IndexSearcher(reader);
  }
  
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
@ -29,6 +29,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@ -103,7 +104,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
    w.addDocument(doc);
    w.close();

-    IndexReader r = IndexReader.open(dir);
+    IndexReader r = DirectoryReader.open(dir);
    Terms vector = r.getTermVectors(0).terms("field");
    assertEquals(1, vector.size());
    TermsEnum termsEnum = vector.iterator(null);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
@ -22,6 +22,7 @@ import java.io.Reader;

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.analysis.TokenStream;
@ -38,65 +39,18 @@ import org.apache.lucene.util.Version;
 public class TestSnowball extends BaseTokenStreamTestCase {

  public void testEnglish() throws Exception {
-    Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, "English"));
+      }
+    };
+    
    assertAnalyzesTo(a, "he abhorred accents",
        new String[]{"he", "abhor", "accent"});
  }
  
-  public void testStopwords() throws Exception {
-    Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English",
-        StandardAnalyzer.STOP_WORDS_SET);
-    assertAnalyzesTo(a, "the quick brown fox jumped",
-        new String[]{"quick", "brown", "fox", "jump"});
-  }
-
-  /**
-   * Test english lowercasing. Test both cases (pre-3.1 and post-3.1) to ensure
-   * we lowercase I correct for non-Turkish languages in either case.
-   */
-  public void testEnglishLowerCase() throws Exception {
-    Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
-    assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" });
-    assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" });
-    
-    Analyzer b = new SnowballAnalyzer(Version.LUCENE_30, "English");
-    assertAnalyzesTo(b, "cryogenic", new String[] { "cryogen" });
-    assertAnalyzesTo(b, "CRYOGENIC", new String[] { "cryogen" });
-  }
-  
-  /**
-   * Test turkish lowercasing
-   */
-  public void testTurkish() throws Exception {
-    Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish");
-
-    assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
-    assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
-  }
-  
-  /**
-   * Test turkish lowercasing (old buggy behavior)
-   * @deprecated (3.1) Remove this when support for 3.0 indexes is no longer required (5.0)
-   */
-  @Deprecated
-  public void testTurkishBWComp() throws Exception {
-    Analyzer a = new SnowballAnalyzer(Version.LUCENE_30, "Turkish");
-    // AĞACI in turkish lowercases to ağacı, but with lowercase filter ağaci.
-    // this fails due to wrong casing, because the stemmer
-    // will only remove -ı, not -i
-    assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
-    assertAnalyzesTo(a, "AĞACI", new String[] { "ağaci" });
-  }
-
-  
-  public void testReusableTokenStream() throws Exception {
-    Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
-    assertAnalyzesToReuse(a, "he abhorred accents",
-        new String[]{"he", "abhor", "accent"});
-    assertAnalyzesToReuse(a, "she abhorred him",
-        new String[]{"she", "abhor", "him"});
-  }
-  
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -62,13 +62,6 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 	      new int[] { 5, 2, 1 });
 	}
 	
-	public void testBackwardsStopWords() throws Exception {
-	   assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_35), "การที่ได้ต้องแสดงว่างานดี", 
-	        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
-	        new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
-	        new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
-	}
-	
 	public void testTokenType() throws Exception {
      assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
                       new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
@ -79,43 +72,6 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
                                      "<NUM>" });
 	}

-	/**
-	 * Thai numeric tokens were typed as <ALPHANUM> instead of <NUM>.
-	 * @deprecated (3.1) testing backwards behavior
- 	 */
-	@Deprecated
-	public void testBuggyTokenType30() throws Exception {
-		assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_30), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
-                         new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
-                         new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
-                                        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
-                                        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
-	}
-	
-	/** @deprecated (3.1) testing backwards behavior */
-	@Deprecated
-    public void testAnalyzer30() throws Exception {
-        ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
-	
-		assertAnalyzesTo(analyzer, "", new String[] {});
-
-		assertAnalyzesTo(
-			analyzer,
-			"การที่ได้ต้องแสดงว่างานดี",
-			new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
-
-		assertAnalyzesTo(
-			analyzer,
-			"บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
-			new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
-
-    // English stop words
-		assertAnalyzesTo(
-			analyzer,
-			"ประโยคว่า The quick brown fox jumped over the lazy dogs",
-			new String[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" });
-	}
-	
 	/*
 	 * Test that position increments are adjusted correctly for stopwords.
 	 */
@ -151,23 +107,6 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
          new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" });
 	}
 	
-	/** @deprecated (3.1) for version back compat */
-	@Deprecated
-	public void testReusableTokenStream30() throws Exception {
-	    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
-	    assertAnalyzesToReuse(analyzer, "", new String[] {});
-
-	    assertAnalyzesToReuse(
-            analyzer,
-            "การที่ได้ต้องแสดงว่างานดี",
-            new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
-
-	    assertAnalyzesToReuse(
-            analyzer,
-            "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
-            new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
-  }
-	
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    checkRandomData(random(), new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
@ -181,7 +120,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
  
  // LUCENE-3044
  public void testAttributeReuse() throws Exception {
-    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+    ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
    // just consume
    TokenStream ts = analyzer.tokenStream("dummy", new StringReader("ภาษาไทย"));
    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
@ -250,77 +250,6 @@ public class TestCharArraySet extends LuceneTestCase {
    }
  }
  
-  /**
-   * @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support is
-   *             no longer needed.
-   */
-  @Deprecated
-  public void testSupplementaryCharsBWCompat() {
-    String missing = "Term %s is missing in the set";
-    String falsePos = "Term %s is in the set but shouldn't";
-    // for reference see
-    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
-    String[] upperArr = new String[] {"Abc\ud801\udc1c",
-        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
-    String[] lowerArr = new String[] {"abc\ud801\udc44",
-        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
-    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), true);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-  }
-
-  /**
-   * @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support is
-   *             no longer needed.
-   */
-  @Deprecated
-  public void testSingleHighSurrogateBWComapt() {
-    String missing = "Term %s is missing in the set";
-    String falsePos = "Term %s is in the set but shouldn't";
-    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
-        "\uD800EfG", "\uD800\ud801\udc1cB" };
-
-    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
-        "\uD800efg", "\uD800\ud801\udc44b" };
-    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays
-        .asList(TEST_STOP_WORDS), true);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
-      if (i == lowerArr.length - 1)
-        assertFalse(String.format(falsePos, lowerArr[i]), set
-            .contains(lowerArr[i]));
-      else
-        assertTrue(String.format(missing, lowerArr[i]), set
-            .contains(lowerArr[i]));
-    }
-    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS),
-        false);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(falsePos, lowerArr[i]), set
-          .contains(lowerArr[i]));
-    }
-  }
-  
  @SuppressWarnings("deprecated")
  public void testCopyCharArraySetBWCompat() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
@ -499,10 +428,5 @@ public class TestCharArraySet extends LuceneTestCase {
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
-    
-    set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
-    assertEquals("[test]", set.toString());
-    set.add("test2");
-    assertTrue(set.toString().contains(", "));
  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
@ -33,7 +33,7 @@ public class TestCharacterUtils extends LuceneTestCase {

  @Test
  public void testCodePointAtCharArrayInt() {
-    CharacterUtils java4 = CharacterUtils.getInstance(Version.LUCENE_30);
+    CharacterUtils java4 = CharacterUtils.getJava4Instance();
    char[] cpAt3 = "Abc\ud801\udc1c".toCharArray();
    char[] highSurrogateAt3 = "Abc\ud801".toCharArray();
    assertEquals((int) 'A', java4.codePointAt(cpAt3, 0));
@ -59,7 +59,7 @@ public class TestCharacterUtils extends LuceneTestCase {

  @Test
  public void testCodePointAtCharSequenceInt() {
-    CharacterUtils java4 = CharacterUtils.getInstance(Version.LUCENE_30);
+    CharacterUtils java4 = CharacterUtils.getJava4Instance();
    String cpAt3 = "Abc\ud801\udc1c";
    String highSurrogateAt3 = "Abc\ud801";
    assertEquals((int) 'A', java4.codePointAt(cpAt3, 0));
@ -86,7 +86,7 @@ public class TestCharacterUtils extends LuceneTestCase {

  @Test
  public void testCodePointAtCharArrayIntInt() {
-    CharacterUtils java4 = CharacterUtils.getInstance(Version.LUCENE_30);
+    CharacterUtils java4 = CharacterUtils.getJava4Instance();
    char[] cpAt3 = "Abc\ud801\udc1c".toCharArray();
    char[] highSurrogateAt3 = "Abc\ud801".toCharArray();
    assertEquals((int) 'A', java4.codePointAt(cpAt3, 0, 2));
@ -122,9 +122,10 @@ public class TestCharacterUtils extends LuceneTestCase {

  @Test
  public void testFillNoHighSurrogate() throws IOException {
-    Version[] versions = new Version[] { Version.LUCENE_30, TEST_VERSION_CURRENT };
-    for (Version version : versions) {
-      CharacterUtils instance = CharacterUtils.getInstance(version);
+    CharacterUtils versions[] = new CharacterUtils[] { 
+        CharacterUtils.getInstance(TEST_VERSION_CURRENT), 
+        CharacterUtils.getJava4Instance() };
+    for (CharacterUtils instance : versions) {
      Reader reader = new StringReader("helloworld");
      CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
      assertTrue(instance.fill(buffer,reader));
@ -172,7 +173,7 @@ public class TestCharacterUtils extends LuceneTestCase {
  @Test
  public void testFillJava14() throws IOException {
    String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
-    CharacterUtils instance = CharacterUtils.getInstance(Version.LUCENE_30);
+    CharacterUtils instance = CharacterUtils.getJava4Instance();
    Reader reader = new StringReader(input);
    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
    assertTrue(instance.fill(buffer, reader));
--- a/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
@ -1,102 +0,0 @@
-package org.apache.lucene.collation;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.util.BytesRef;
-
-import java.text.Collator;
-import java.util.Locale;
-import java.io.Reader;
-
-/**
- * @deprecated remove when CollationKeyFilter is removed.
- */
-@Deprecated
-public class TestCollationKeyFilter extends CollationTestBase {
-  // the sort order of Ø versus U depends on the version of the rules being used
-  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
-  // its not used in english.
-  boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
-  
-  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
-  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
-  // characters properly.
-  private Collator collator = Collator.getInstance(new Locale("ar"));
-  private Analyzer analyzer = new TestAnalyzer(collator);
-
-  private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
-  private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
-  private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
-  private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
-
-  
-  public final class TestAnalyzer extends Analyzer {
-    private Collator _collator;
-
-    TestAnalyzer(Collator collator) {
-      _collator = collator;
-    }
-
-    @Override
-    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      Tokenizer result = new KeywordTokenizer(reader);
-      return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator));
-    }
-  }
-
-  public void testFarsiRangeFilterCollating() throws Exception {
-    testFarsiRangeFilterCollating
-      (analyzer, firstRangeBeginning, firstRangeEnd, 
-       secondRangeBeginning, secondRangeEnd);
-  }
- 
-  public void testFarsiRangeQueryCollating() throws Exception {
-    testFarsiRangeQueryCollating
-      (analyzer, firstRangeBeginning, firstRangeEnd, 
-       secondRangeBeginning, secondRangeEnd);
-  }
-
-  public void testFarsiTermRangeQuery() throws Exception {
-    testFarsiTermRangeQuery
-      (analyzer, firstRangeBeginning, firstRangeEnd, 
-       secondRangeBeginning, secondRangeEnd);
-  }
-  
-  public void testCollationKeySort() throws Exception {
-    Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US));
-    Analyzer franceAnalyzer 
-      = new TestAnalyzer(Collator.getInstance(Locale.FRANCE));
-    Analyzer swedenAnalyzer 
-      = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se")));
-    Analyzer denmarkAnalyzer 
-      = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
-    
-    // The ICU Collator and Sun java.text.Collator implementations differ in their
-    // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
-    testCollationKeySort
-    (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
-     oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
-  }
-}
--- a/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java
+++ b/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java
@ -22,18 +22,16 @@ import com.ibm.icu.text.Collator;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.collation.CollationKeyAnalyzer; // javadocs
-import org.apache.lucene.util.IndexableBinaryStringTools; // javadocs
 import org.apache.lucene.util.Version;

 import java.io.Reader;

 /**
 * <p>
- *   Filters {@link KeywordTokenizer} with {@link ICUCollationKeyFilter}.
+ *   Configures {@link KeywordTokenizer} with {@link ICUCollationAttributeFactory}.
 * <p>
 *   Converts the token into its {@link com.ibm.icu.text.CollationKey}, and
- *   then encodes the CollationKey either directly or with 
- *   {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow it to
+ *   then encodes the CollationKey directly to allow it to
 *   be stored as an index term.
 * </p>
 * <p>
@ -67,48 +65,24 @@ import java.io.Reader;
 *   generation timing and key length comparisons between ICU4J and
 *   java.text.Collator over several languages.
 * </p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ICUCollationKeyAnalyzer:
- * <ul>
- *   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
- *   versions will encode the bytes with {@link IndexableBinaryStringTools}.
- * </ul>
 */
 public final class ICUCollationKeyAnalyzer extends Analyzer {
-  private final Collator collator;
  private final ICUCollationAttributeFactory factory;
-  private final Version matchVersion;

  /**
   * Create a new ICUCollationKeyAnalyzer, using the specified collator.
   * 
-   * @param matchVersion See <a href="#version">above</a>
+   * @param matchVersion compatibility version
   * @param collator CollationKey generator
   */
  public ICUCollationKeyAnalyzer(Version matchVersion, Collator collator) {
-    this.matchVersion = matchVersion;
-    this.collator = collator;
    this.factory = new ICUCollationAttributeFactory(collator);
  }

-  /**
-   * @deprecated Use {@link ICUCollationKeyAnalyzer#ICUCollationKeyAnalyzer(Version, Collator)}
-   *   and specify a version instead. This ctor will be removed in Lucene 5.0
-   */
-  @Deprecated
-  public ICUCollationKeyAnalyzer(Collator collator) {
-    this(Version.LUCENE_31, collator);
-  }

  @Override
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    if (matchVersion.onOrAfter(Version.LUCENE_40)) {
-      KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
-      return new TokenStreamComponents(tokenizer, tokenizer);
-    } else {
-      KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
-      return new TokenStreamComponents(tokenizer, new ICUCollationKeyFilter(tokenizer, collator));
-    }
+    KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+    return new TokenStreamComponents(tokenizer, tokenizer);
  }
 }
--- a/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
+++ b/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
@ -1,114 +0,0 @@
-package org.apache.lucene.collation;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RawCollationKey;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.IndexableBinaryStringTools;
-
-import org.apache.lucene.collation.CollationKeyFilter; // javadocs
-
-import java.io.IOException;
-
-
-/**
- * <p>
- *   Converts each token into its {@link com.ibm.icu.text.CollationKey}, and
- *   then encodes the CollationKey with {@link IndexableBinaryStringTools}, to
- *   allow it to be stored as an index term.
- * </p>
- * <p>
- *   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- *   index and query time -- CollationKeys are only comparable when produced by
- *   the same Collator.  {@link com.ibm.icu.text.RuleBasedCollator}s are 
- *   independently versioned, so it is safe to search against stored
- *   CollationKeys if the following are exactly the same (best practice is
- *   to store this information with the index and check that they remain the
- *   same at query time):
- * </p>
- * <ol>
- *   <li>
- *     Collator version - see {@link Collator#getVersion()}
- *   </li>
- *   <li>
- *     The collation strength used - see {@link Collator#setStrength(int)}
- *   </li>
- * </ol> 
- * <p>
- *   CollationKeys generated by ICU Collators are not compatible with those
- *   generated by java.text.Collators.  Specifically, if you use 
- *   ICUCollationKeyFilter to generate index terms, do not use 
- *   {@link CollationKeyFilter} on the query side, or vice versa.
- * </p>
- * <p>
- *   ICUCollationKeyFilter is significantly faster and generates significantly
- *   shorter keys than CollationKeyFilter.  See
- *   <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
- *   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
- *   generation timing and key length comparisons between ICU4J and
- *   java.text.Collator over several languages.
- * </p>
- *  @deprecated Use {@link ICUCollationAttributeFactory} instead, which encodes
- *  terms directly as bytes. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ICUCollationKeyFilter extends TokenFilter {
-  private Collator collator = null;
-  private RawCollationKey reusableKey = new RawCollationKey();
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-
-  /**
-   * 
-   * @param input Source token stream
-   * @param collator CollationKey generator
-   */
-  public ICUCollationKeyFilter(TokenStream input, Collator collator) {
-    super(input);
-    // clone the collator: see http://userguide.icu-project.org/collation/architecture
-    try {
-      this.collator = (Collator) collator.clone();
-    } catch (CloneNotSupportedException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-    if (input.incrementToken()) {
-      char[] termBuffer = termAtt.buffer();
-      String termText = new String(termBuffer, 0, termAtt.length());
-      collator.getRawCollationKey(termText, reusableKey);
-      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
-          reusableKey.bytes, 0, reusableKey.size);
-      if (encodedLength > termBuffer.length) {
-        termAtt.resizeBuffer(encodedLength);
-      }
-      termAtt.setLength(encodedLength);
-      IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
-          termAtt.buffer(), 0, encodedLength);
-      return true;
-    } else {
-      return false;
-    }
-  }
-}
--- a/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java
@ -1,98 +0,0 @@
-package org.apache.lucene.collation;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import com.ibm.icu.text.Collator;
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.util.BytesRef;
-
-import java.io.Reader;
-import java.util.Locale;
-
-/** @deprecated remove this when ICUCollationKeyFilter is removed */
-@Deprecated
-public class TestICUCollationKeyFilter extends CollationTestBase {
-
-  private Collator collator = Collator.getInstance(new Locale("fa"));
-  private Analyzer analyzer = new TestAnalyzer(collator);
-
-  private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
-  private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
-  private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
-  private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey
-    (collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
-
-  
-  public final class TestAnalyzer extends Analyzer {
-    private Collator _collator;
-
-    TestAnalyzer(Collator collator) {
-      _collator = collator;
-    }
-
-    @Override
-    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      Tokenizer result = new KeywordTokenizer(reader);
-      return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator));
-    }
-  }
-
-  public void testFarsiRangeFilterCollating() throws Exception {
-    testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
-                                  secondRangeBeginning, secondRangeEnd);
-  }
- 
-  public void testFarsiRangeQueryCollating() throws Exception {
-    testFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
-                                 secondRangeBeginning, secondRangeEnd);
-  }
-
-  public void testFarsiTermRangeQuery() throws Exception {
-    testFarsiTermRangeQuery
-      (analyzer, firstRangeBeginning, firstRangeEnd, 
-       secondRangeBeginning, secondRangeEnd);
-  }
-
-  // Test using various international locales with accented characters (which
-  // sort differently depending on locale)
-  //
-  // Copied (and slightly modified) from 
-  // org.apache.lucene.search.TestSort.testInternationalSort()
-  //  
-  public void testCollationKeySort() throws Exception {
-    Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US));
-    Analyzer franceAnalyzer 
-      = new TestAnalyzer(Collator.getInstance(Locale.FRANCE));
-    Analyzer swedenAnalyzer 
-      = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se")));
-    Analyzer denmarkAnalyzer 
-      = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
-
-    // The ICU Collator and java.text.Collator implementations differ in their
-    // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
-    testCollationKeySort
-    (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
-     "BFJHD", "ECAGI", "BJDFH", "BJDHF");
-  }
-}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java
@ -60,7 +60,7 @@ public class NearRealtimeReaderTask extends PerfTask {
    }
    
    long t = System.currentTimeMillis();
-    DirectoryReader r = IndexReader.open(w, true);
+    DirectoryReader r = DirectoryReader.open(w, true);
    runData.setIndexReader(r);
    // Transfer our reference to runData
    r.decRef();
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java
@ -45,9 +45,9 @@ public class OpenReaderTask extends PerfTask {
    Directory dir = getRunData().getDirectory();
    DirectoryReader r = null;
    if (commitUserData != null) {
-      r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, commitUserData)); 
+      r = DirectoryReader.open(OpenReaderTask.findIndexCommit(dir, commitUserData)); 
    } else {
-      r = IndexReader.open(dir); 
+      r = DirectoryReader.open(dir); 
    }
    getRunData().setIndexReader(r);
    // We transfer reference to the run data
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PrintReaderTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PrintReaderTask.java
@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
 */

 import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.store.Directory;

@ -47,9 +48,9 @@ public class PrintReaderTask extends PerfTask {
    Directory dir = getRunData().getDirectory();
    IndexReader r = null;
    if (userData == null) 
-      r = IndexReader.open(dir);
+      r = DirectoryReader.open(dir);
    else
-      r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, userData));
+      r = DirectoryReader.open(OpenReaderTask.findIndexCommit(dir, userData));
    System.out.println("--> numDocs:"+r.numDocs()+" dels:"+r.numDeletedDocs());
    r.close();
    return 1;
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiFields;
@ -84,7 +85,7 @@ public abstract class ReadTask extends PerfTask {
    if (searcher == null) {
      // open our own reader
      Directory dir = getRunData().getDirectory();
-      reader = IndexReader.open(dir);
+      reader = DirectoryReader.open(dir);
      searcher = new IndexSearcher(reader);
      closeSearcher = true;
    } else {
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.quality.trec;
 import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
 import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
 import org.apache.lucene.benchmark.quality.*;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.FSDirectory;
@ -53,7 +54,7 @@ public class QueryDriver {
    SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
    FSDirectory dir = FSDirectory.open(new File(args[3]));
    String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
-    IndexReader reader = IndexReader.open(dir);
+    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);

    int maxResults = 1000;
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality.utils;
 import java.io.File;
 import java.io.IOException;

+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Terms;
@ -86,7 +87,7 @@ public class QualityQueriesFinder {
  
  private String [] bestTerms(String field,int numTerms) throws IOException {
    PriorityQueue<TermDf> pq = new TermsDfQueue(numTerms);
-    IndexReader ir = IndexReader.open(dir);
+    IndexReader ir = DirectoryReader.open(dir);
    try {
      int threshold = ir.maxDoc() / 10; // ignore words too common.
      Terms terms = MultiFields.getTerms(ir, field);
--- a/Show More
+++ b/Show More