LUCENE-5859: Literally add back dead code to please a bunch of fucking babies

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1614852 13f79535-47bb-0310-9956-ffa450edef68
2014-07-31 10:44:39 +00:00 · 2014-07-31 10:44:39 +00:00 · e6d29d223b
parent 0f8f76ce6a
commit e6d29d223b
261 changed files with 1840 additions and 1248 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Arabic. 
@ -88,18 +89,20 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public ArabicAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public ArabicAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public ArabicAnalyzer(CharArraySet stopwords){
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -107,14 +110,17 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * {@link ArabicStemFilter}.
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   * @param stemExclusionSet
   *          a set of terms not to be stemmed
   */
-  public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -130,10 +136,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new LowerCaseFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
    // the order here is important: the stopword list is not normalized!
-    result = new StopFilter(result, stopwords);
+    result = new StopFilter( matchVersion, result, stopwords);
    // TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
    result = new ArabicNormalizationFilter(result);
    if(!stemExclusionSet.isEmpty()) {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.bg;

 import java.io.IOException;
 import java.io.Reader;
+import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -30,6 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Bulgarian.
@ -40,7 +42,6 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 * <p>
 */
 public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
-
  /**
   * File containing default Bulgarian stopwords.
   * 
@ -83,15 +84,15 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
   * Builds an analyzer with the default stop words:
   * {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public BulgarianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public BulgarianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   */
-  public BulgarianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  
  /**
@ -99,10 +100,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
   * If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter} 
   * before {@link BulgarianStemFilter}.
   */
-  public BulgarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));  
-  }
+  public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));  }

  /**
   * Creates a
@ -118,10 +119,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  public TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new BulgarianStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@ -65,7 +65,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#");
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -83,29 +83,35 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
   */
-  public BrazilianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public BrazilianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }

  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public BrazilianAnalyzer(CharArraySet stopwords) {
-     super(stopwords);
+  public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+     super(matchVersion, stopwords);
  }

  /**
   * Builds an analyzer with the given stop words and stemming exclusion words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public BrazilianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    this(stopwords);
-    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords,
+      CharArraySet stemExclusionSet) {
+    this(matchVersion, stopwords);
+    excltable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(matchVersion, stemExclusionSet));
  }

  /**
@ -120,10 +126,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    Tokenizer source = new StandardTokenizer();
-    TokenStream result = new LowerCaseFilter(source);
-    result = new StandardFilter(result);
-    result = new StopFilter(result, stopwords);
+    Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
+    result = new StandardFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(excltable != null && !excltable.isEmpty())
      result = new SetKeywordMarkerFilter(result, excltable);
    return new TokenStreamComponents(source, new BrazilianStemFilter(result));
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ElisionFilter;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.CatalanStemmer;

 /**
@ -45,7 +46,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  
  private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
-      new CharArraySet(
+      new CharArraySet(Version.LUCENE_CURRENT, 
          Arrays.asList(
              "d", "l", "m", "n", "s", "t"
          ), true));
@ -80,17 +81,18 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public CatalanAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public CatalanAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public CatalanAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -98,12 +100,14 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -120,11 +124,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new ElisionFilter(result, DEFAULT_ARTICLES);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new CatalanStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@ -26,6 +26,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;

+import org.apache.lucene.util.Version;
 import org.apache.lucene.analysis.util.CharArrayMap;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.OpenStringBuilder;
@ -29840,7 +29841,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
    upperCaseVariantsAccepted.put("amp", "AMP");
  }
  private static final CharArrayMap<Character> entityValues
-      = new CharArrayMap<>(253, false);
+      = new CharArrayMap<>(Version.LUCENE_CURRENT, 253, false);
  static {
    String[] entities = {
      "AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2",
@ -29979,7 +29980,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
          escapeSTYLE = true;
        } else {
          if (null == this.escapedTags) {
-            this.escapedTags = new CharArraySet(16, true);
+            this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
          }
          this.escapedTags.add(tag);
        }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@ -24,6 +24,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;

+import org.apache.lucene.util.Version;
 import org.apache.lucene.analysis.util.CharArrayMap;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.OpenStringBuilder;
@ -194,7 +195,7 @@ InlineElment = ( [aAbBiIqQsSuU]                   |
          escapeSTYLE = true;
        } else {
          if (null == this.escapedTags) {
-            this.escapedTags = new CharArraySet(16, true);
+            this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
          }
          this.escapedTags.add(tag);
        }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
 */

 import java.io.IOException;
+import java.io.Reader;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@ -27,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;

 /**
 * An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},
@ -35,7 +37,6 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 * and filters stopwords with {@link StopFilter}
 */
 public final class CJKAnalyzer extends StopwordAnalyzerBase {
-
  /**
   * File containing default CJK stopwords.
   * <p/>
@ -69,27 +70,29 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer which removes words in {@link #getDefaultStopSet()}.
   */
-  public CJKAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public CJKAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public CJKAnalyzer(CharArraySet stopwords){
-    super(stopwords);
+  public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){
+    super(matchVersion, stopwords);
  }

  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
+    final Tokenizer source = new StandardTokenizer(matchVersion);
    // run the widthfilter first before bigramming, it sometimes combines characters.
    TokenStream result = new CJKWidthFilter(source);
-    result = new LowerCaseFilter(result);
+    result = new LowerCaseFilter(matchVersion, result);
    result = new CJKBigramFilter(result);
-    return new TokenStreamComponents(source, new StopFilter(result, stopwords));
+    return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Sorani Kurdish.
@ -61,7 +62,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -73,17 +74,18 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public SoraniAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public SoraniAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public SoraniAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -91,12 +93,14 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public SoraniAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -114,11 +118,11 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new SoraniNormalizationFilter(result);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SoraniStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
@ -78,7 +78,7 @@ public final class CommonGramsFilter extends TokenFilter {
   * @param input TokenStream input in filter chain
   * @param commonWords The set of common words.
   */
-  public CommonGramsFilter(TokenStream input, CharArraySet commonWords) {
+  public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) {
    super(input);
    this.commonWords = commonWords;
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
@ -76,7 +76,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso

  @Override
  public TokenFilter create(TokenStream input) {
-    CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords);
+    CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
    return commonGrams;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.core;
 * limitations under the License.
 */

+import java.io.Reader;
+
 import org.apache.lucene.analysis.Analyzer;

 /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
@ -18,11 +18,13 @@ package org.apache.lucene.analysis.core;
 */

 import java.io.IOException;
+import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;

 /**
 * Emits the entire input as a single token.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;

+import java.io.Reader;
 import java.util.Map;

 /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.Version;

 /**
 * A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
@ -29,25 +30,41 @@ import org.apache.lucene.util.AttributeFactory;
 * Note: this does a decent job for most European languages, but does a terrible
 * job for some Asian languages, where words are not separated by spaces.
 * </p>
+ * <p>
+ * <a name="version"/>
+ * You must specify the required {@link Version} compatibility when creating
+ * {@link LetterTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
+ * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * </p>
 */

 public class LetterTokenizer extends CharTokenizer {
  
  /**
   * Construct a new LetterTokenizer.
+   * 
+   * @param matchVersion
+   *          Lucene version to match See {@link <a href="#version">above</a>}
   */
-  public LetterTokenizer() {
+  public LetterTokenizer(Version matchVersion) {
+    super(matchVersion);
  }
  
  /**
   * Construct a new LetterTokenizer using a given
   * {@link org.apache.lucene.util.AttributeFactory}.
   * 
+   * @param matchVersion
+   *          Lucene version to match See {@link <a href="#version">above</a>}
   * @param factory
   *          the attribute factory to use for this {@link Tokenizer}
   */
-  public LetterTokenizer(AttributeFactory factory) {
-    super(factory);
+  public LetterTokenizer(Version matchVersion, AttributeFactory factory) {
+    super(matchVersion, factory);
  }
  
  /** Collects only characters which satisfy
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
@ -36,6 +36,7 @@ public class LetterTokenizerFactory extends TokenizerFactory {
  /** Creates a new LetterTokenizerFactory */
  public LetterTokenizerFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -43,6 +44,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {

  @Override
  public LetterTokenizer create(AttributeFactory factory) {
-    return new LetterTokenizer(factory);
+    return new LetterTokenizer(luceneMatchVersion, factory);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@ -23,21 +23,30 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;

 /**
 * Normalizes token text to lower case.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating LowerCaseFilter:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are properly lowercased.
+ * </ul>
 */
 public final class LowerCaseFilter extends TokenFilter {
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
+  private final CharacterUtils charUtils;
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  
  /**
   * Create a new LowerCaseFilter, that normalizes token text to lower case.
   * 
+   * @param matchVersion See <a href="#version">above</a>
   * @param in TokenStream to filter
   */
-  public LowerCaseFilter(TokenStream in) {
+  public LowerCaseFilter(Version matchVersion, TokenStream in) {
    super(in);
+    charUtils = CharacterUtils.getInstance(matchVersion);
  }
  
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
@ -40,6 +40,7 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT
  /** Creates a new LowerCaseFilterFactory */
  public LowerCaseFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -47,7 +48,7 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT

  @Override
  public LowerCaseFilter create(TokenStream input) {
-    return new LowerCaseFilter(input);
+    return new LowerCaseFilter(luceneMatchVersion,input);
  }

  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@ -17,8 +17,13 @@ package org.apache.lucene.analysis.core;
 * limitations under the License.
 */

+import java.io.Reader;
+
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;

 /**
 * LowerCaseTokenizer performs the function of LetterTokenizer
@ -30,24 +35,41 @@ import org.apache.lucene.util.AttributeFactory;
 * Note: this does a decent job for most European languages, but does a terrible
 * job for some Asian languages, where words are not separated by spaces.
 * </p>
+ * <p>
+ * <a name="version"/>
+ * You must specify the required {@link Version} compatibility when creating
+ * {@link LowerCaseTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
+ * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * </p>
 */
 public final class LowerCaseTokenizer extends LetterTokenizer {
  
  /**
   * Construct a new LowerCaseTokenizer.
+   * 
+   * @param matchVersion
+   *          Lucene version to match See {@link <a href="#version">above</a>}
+   * 
   */
-  public LowerCaseTokenizer() {
+  public LowerCaseTokenizer(Version matchVersion) {
+    super(matchVersion);
  }

  /**
   * Construct a new LowerCaseTokenizer using a given
   * {@link org.apache.lucene.util.AttributeFactory}.
   *
+   * @param matchVersion
+   *          Lucene version to match See {@link <a href="#version">above</a>}
   * @param factory
   *          the attribute factory to use for this {@link Tokenizer}
   */
-  public LowerCaseTokenizer(AttributeFactory factory) {
-    super(factory);
+  public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory) {
+    super(matchVersion, factory);
  }
  
  /** Converts char to lower case
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
@ -39,6 +39,7 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
  /** Creates a new LowerCaseTokenizerFactory */
  public LowerCaseTokenizerFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -46,7 +47,7 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi

  @Override
  public LowerCaseTokenizer create(AttributeFactory factory) {
-    return new LowerCaseTokenizer(factory);
+    return new LowerCaseTokenizer(luceneMatchVersion, factory);
  }

  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@ -17,22 +17,38 @@ package org.apache.lucene.analysis.core;
 * limitations under the License.
 */

+import java.io.Reader;
+
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.Version;

 /** An {@link Analyzer} that filters {@link LetterTokenizer} 
 *  with {@link LowerCaseFilter} 
+ * <p>
+ * <a name="version">You must specify the required {@link Version} compatibility
+ * when creating {@link CharTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link LowerCaseTokenizer} uses an int based API to normalize and
+ * detect token codepoints. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * <p>
 **/
 public final class SimpleAnalyzer extends Analyzer {

+  private final Version matchVersion;
+  
  /**
   * Creates a new {@link SimpleAnalyzer}
+   * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
   */
-  public SimpleAnalyzer() {
+  public SimpleAnalyzer(Version matchVersion) {
+    this.matchVersion = matchVersion;
  }
  
  @Override
  protected TokenStreamComponents createComponents(final String fieldName) {
-    return new TokenStreamComponents(new LowerCaseTokenizer());
+    return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion));
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@ -27,10 +27,20 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;

-/** 
- * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
+/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopAnalyzer:
+ * <ul>
+ *    <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
+ *   <li> As of 2.9, position increments are preserved
+ * </ul>
 */
+
 public final class StopAnalyzer extends StopwordAnalyzerBase {
  
  /** An unmodifiable set containing some common English words that are not usually useful
@ -45,35 +55,40 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
      "that", "the", "their", "then", "there", "these",
      "they", "this", "to", "was", "will", "with"
    );
-    final CharArraySet stopSet = new CharArraySet(stopWords, false);
+    final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, 
+        stopWords, false);
    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
  }
  
  /** Builds an analyzer which removes words in
   *  {@link #ENGLISH_STOP_WORDS_SET}.
+   * @param matchVersion See <a href="#version">above</a>
   */
-  public StopAnalyzer() {
-    this(ENGLISH_STOP_WORDS_SET);
+  public StopAnalyzer(Version matchVersion) {
+    this(matchVersion, ENGLISH_STOP_WORDS_SET);
  }

  /** Builds an analyzer with the stop words from the given set.
+   * @param matchVersion See <a href="#version">above</a>
   * @param stopWords Set of stop words */
-  public StopAnalyzer(CharArraySet stopWords) {
-    super(stopWords);
+  public StopAnalyzer(Version matchVersion, CharArraySet stopWords) {
+    super(matchVersion, stopWords);
  }

  /** Builds an analyzer with the stop words from the given file.
-   * @see WordlistLoader#getWordSet(Reader)
+   * @see WordlistLoader#getWordSet(Reader, Version)
+   * @param matchVersion See <a href="#version">above</a>
   * @param stopwordsFile File to load stop words from */
-  public StopAnalyzer(File stopwordsFile) throws IOException {
-    this(loadStopwordSet(stopwordsFile));
+  public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
+    this(matchVersion, loadStopwordSet(stopwordsFile, matchVersion));
  }

  /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
+   * @see WordlistLoader#getWordSet(Reader, Version)
+   * @param matchVersion See <a href="#version">above</a>
   * @param stopwords Reader to load stop words from */
-  public StopAnalyzer(Reader stopwords) throws IOException {
-    this(loadStopwordSet(stopwords));
+  public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
  }

  /**
@ -87,8 +102,9 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new LowerCaseTokenizer();
-    return new TokenStreamComponents(source, new StopFilter(source, stopwords));
+    final Tokenizer source = new LowerCaseTokenizer(matchVersion);
+    return new TokenStreamComponents(source, new StopFilter(matchVersion,
+          source, stopwords));
  }
 }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
@ -24,9 +24,19 @@ import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;

 /**
 * Removes stop words from a token stream.
+ * 
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopFilter:
+ * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords and position
+ *         increments are preserved
+ * </ul>
 */
 public final class StopFilter extends FilteringTokenFilter {

@ -37,14 +47,17 @@ public final class StopFilter extends FilteringTokenFilter {
   * Constructs a filter which removes words from the input TokenStream that are
   * named in the Set.
   * 
+   * @param matchVersion
+   *          Lucene version to enable correct Unicode 4.0 behavior in the stop
+   *          set if Version > 3.0.  See <a href="#version">above</a> for details.
   * @param in
   *          Input stream
   * @param stopWords
   *          A {@link CharArraySet} representing the stopwords.
-   * @see #makeStopSet(java.lang.String...)
+   * @see #makeStopSet(Version, java.lang.String...)
   */
-  public StopFilter(TokenStream in, CharArraySet stopWords) {
-    super(in);
+  public StopFilter(Version matchVersion, TokenStream in, CharArraySet stopWords) {
+    super(matchVersion, in);
    this.stopWords = stopWords;
  }

@ -54,11 +67,12 @@ public final class StopFilter extends FilteringTokenFilter {
   * This permits this stopWords construction to be cached once when
   * an Analyzer is constructed.
   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
   * @param stopWords An array of stopwords
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
   */
-  public static CharArraySet makeStopSet(String... stopWords) {
-    return makeStopSet(stopWords, false);
+  public static CharArraySet makeStopSet(Version matchVersion, String... stopWords) {
+    return makeStopSet(matchVersion, stopWords, false);
  }
  
  /**
@ -67,35 +81,38 @@ public final class StopFilter extends FilteringTokenFilter {
   * This permits this stopWords construction to be cached once when
   * an Analyzer is constructed.
   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
   * @return A Set ({@link CharArraySet}) containing the words
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
   */
-  public static CharArraySet makeStopSet(List<?> stopWords) {
-    return makeStopSet(stopWords, false);
+  public static CharArraySet makeStopSet(Version matchVersion, List<?> stopWords) {
+    return makeStopSet(matchVersion, stopWords, false);
  }
    
  /**
   * Creates a stopword set from the given stopword array.
   * 
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
   * @param stopWords An array of stopwords
   * @param ignoreCase If true, all words are lower cased first.  
   * @return a Set containing the words
   */    
-  public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
-    CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
+  public static CharArraySet makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
+    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
    stopSet.addAll(Arrays.asList(stopWords));
    return stopSet;
  }
  
  /**
   * Creates a stopword set from the given stopword list.
+   * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
   * @param ignoreCase if true, all words are lower cased first
   * @return A Set ({@link CharArraySet}) containing the words
   */
-  public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){
-    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+  public static CharArraySet makeStopSet(Version matchVersion, List<?> stopWords, boolean ignoreCase){
+    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
    stopSet.addAll(stopWords);
    return stopSet;
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@ -81,6 +81,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
  /** Creates a new StopFilterFactory */
  public StopFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    stopWordFiles = get(args, "words");
    format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
    ignoreCase = getBoolean(args, "ignoreCase", false);
@ -103,7 +104,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
      if (null != format) {
        throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
      }
-      stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+      stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    }
  }

@ -117,7 +118,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa

  @Override
  public TokenStream create(TokenStream input) {
-    StopFilter stopFilter = new StopFilter(input,stopWords);
+    StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
    return stopFilter;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
@ -22,6 +22,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.util.Version;

 /**
 * Removes tokens whose types appear in a set of blocked types from a token stream.
@ -34,13 +35,14 @@ public final class TypeTokenFilter extends FilteringTokenFilter {

  /**
   * Create a new {@link TypeTokenFilter}.
+   * @param version      the Lucene match version
   * @param input        the {@link TokenStream} to consume
   * @param stopTypes    the types to filter
   * @param useWhiteList if true, then tokens whose type is in stopTypes will
   *                     be kept, otherwise they will be filtered out
   */
-  public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
-    super(input);
+  public TypeTokenFilter(Version version, TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
+    super(version, input);
    this.stopTypes = stopTypes;
    this.useWhiteList = useWhiteList;
  }
@ -48,9 +50,10 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
  /**
   * Create a new {@link TypeTokenFilter} that filters tokens out
   * (useWhiteList=false).
+   * @see #TypeTokenFilter(Version, TokenStream, Set, boolean)
   */
-  public TypeTokenFilter(TokenStream input, Set<String> stopTypes) {
-    this(input, stopTypes, false);
+  public TypeTokenFilter(Version version, TokenStream input, Set<String> stopTypes) {
+    this(version, input, stopTypes, false);
  }

  /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
@ -72,7 +72,7 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour

  @Override
  public TokenStream create(TokenStream input) {
-    final TokenStream filter = new TypeTokenFilter(input, stopTypes, useWhitelist);
+    final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, input, stopTypes, useWhitelist);
    return filter;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
@ -23,9 +23,13 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;

 /**
 * Normalizes token text to UPPER CASE.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating UpperCaseFilter
 * 
 * <p><b>NOTE:</b> In Unicode, this transformation may lose information when the
 * upper case character represents more than one lower case character. Use this filter
@ -33,16 +37,18 @@ import org.apache.lucene.analysis.util.CharacterUtils;
 * general search matching
 */
 public final class UpperCaseFilter extends TokenFilter {
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
+  private final CharacterUtils charUtils;
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  
  /**
   * Create a new UpperCaseFilter, that normalizes token text to upper case.
   * 
+   * @param matchVersion See <a href="#version">above</a>
   * @param in TokenStream to filter
   */
-  public UpperCaseFilter(TokenStream in) {
+  public UpperCaseFilter(Version matchVersion, TokenStream in) {
    super(in);
+    charUtils = CharacterUtils.getInstance(matchVersion);
  }
  
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
@ -45,6 +45,7 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT
  /** Creates a new UpperCaseFilterFactory */
  public UpperCaseFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -52,7 +53,7 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT

  @Override
  public UpperCaseFilter create(TokenStream input) {
-    return new UpperCaseFilter(input);
+    return new UpperCaseFilter(luceneMatchVersion,input);
  }

  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
@ -17,21 +17,38 @@ package org.apache.lucene.analysis.core;
 * limitations under the License.
 */

+import java.io.Reader;
+
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.util.CharTokenizer;
+import org.apache.lucene.util.Version;

 /**
 * An Analyzer that uses {@link WhitespaceTokenizer}.
+ * <p>
+ * <a name="version">You must specify the required {@link Version} compatibility
+ * when creating {@link CharTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link WhitespaceTokenizer} uses an int based API to normalize and
+ * detect token codepoints. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
+ * <p>
 **/
 public final class WhitespaceAnalyzer extends Analyzer {
  
+  private final Version matchVersion;
+  
  /**
   * Creates a new {@link WhitespaceAnalyzer}
+   * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
   */
-  public WhitespaceAnalyzer() {
+  public WhitespaceAnalyzer(Version matchVersion) {
+    this.matchVersion = matchVersion;
  }
  
  @Override
  protected TokenStreamComponents createComponents(final String fieldName) {
-    return new TokenStreamComponents(new WhitespaceTokenizer());
+    return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion));
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
@ -17,31 +17,50 @@ package org.apache.lucene.analysis.core;
 * limitations under the License.
 */

+import java.io.Reader;
+
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;

 /**
 * A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
- * Adjacent sequences of non-Whitespace characters form tokens.
+ * Adjacent sequences of non-Whitespace characters form tokens. <a
+ * name="version"/>
+ * <p>
+ * You must specify the required {@link Version} compatibility when creating
+ * {@link WhitespaceTokenizer}:
+ * <ul>
+ * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
+ * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
+ * {@link CharTokenizer#normalize(int)} for details.</li>
+ * </ul>
 */
 public final class WhitespaceTokenizer extends CharTokenizer {
  
  /**
-   * Construct a new WhitespaceTokenizer.
+   * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version
+   * to match See {@link <a href="#version">above</a>}
+   *
   */
-  public WhitespaceTokenizer() {
+  public WhitespaceTokenizer(Version matchVersion) {
+    super(matchVersion);
  }

  /**
   * Construct a new WhitespaceTokenizer using a given
   * {@link org.apache.lucene.util.AttributeFactory}.
   *
+   * @param
+   *          matchVersion Lucene version to match See
+   *          {@link <a href="#version">above</a>}
   * @param factory
   *          the attribute factory to use for this {@link Tokenizer}
   */
-  public WhitespaceTokenizer(AttributeFactory factory) {
-    super(factory);
+  public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory) {
+    super(matchVersion, factory);
  }
  
  /** Collects only characters which do not satisfy
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;

+import java.io.Reader;
 import java.util.Map;

 /**
@ -36,6 +37,7 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
  /** Creates a new WhitespaceTokenizerFactory */
  public WhitespaceTokenizerFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -43,6 +45,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {

  @Override
  public WhitespaceTokenizer create(AttributeFactory factory) {
-    return new WhitespaceTokenizer(factory);
+    return new WhitespaceTokenizer(luceneMatchVersion, factory);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 import java.io.*;
 import java.nio.charset.StandardCharsets;
@ -60,7 +61,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#");
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -74,30 +75,34 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {

  /**
   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
+   *
+   * @param matchVersion Lucene version to match
   */
-  public CzechAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_SET);
+  public CzechAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
  }

  /**
   * Builds an analyzer with the given stop words.
   *
+   * @param matchVersion Lucene version to match
   * @param stopwords a stopword set
   */
-  public CzechAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
   * Builds an analyzer with the given stop words and a set of work to be
   * excluded from the {@link CzechStemFilter}.
   * 
+   * @param matchVersion Lucene version to match
   * @param stopwords a stopword set
   * @param stemExclusionTable a stemming exclusion set
   */
-  public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable) {
-    super(stopwords);
-    this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
+  public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) {
+    super(matchVersion, stopwords);
+    this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  }

  /**
@ -110,16 +115,16 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
   *         , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
   *         a stem exclusion set is provided via
-   *         {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a
+   *         {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a
   *         {@link SetKeywordMarkerFilter} is added before
   *         {@link CzechStemFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter( matchVersion, result, stopwords);
    if(!this.stemExclusionTable.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionTable);
    result = new CzechStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.DanishStemmer;

 /**
@ -63,7 +64,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -75,17 +76,18 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public DanishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public DanishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public DanishAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public DanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -93,12 +95,14 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public DanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public DanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -115,10 +119,10 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new DanishStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -69,7 +69,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -91,31 +91,35 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * Builds an analyzer with the default stop words:
   * {@link #getDefaultStopSet()}.
   */
-  public GermanAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_SET);
+  public GermanAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words 
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public GermanAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public GermanAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   * @param stemExclusionSet
   *          a stemming exclusion set
   */
-  public GermanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public GermanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
  }

  /**
@ -131,10 +135,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter( matchVersion, result, stopwords);
    result = new SetKeywordMarkerFilter(result, exclusionSet);
    result = new GermanNormalizationFilter(result);
    result = new GermanLightStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -69,9 +69,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
  
  /**
   * Builds an analyzer with the default stop words.
+   * @param matchVersion Lucene compatibility version
   */
-  public GreekAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_SET);
+  public GreekAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
  }
  
  /**
@ -80,10 +81,11 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
   * <b>NOTE:</b> The stopwords set should be pre-processed with the logic of 
   * {@link GreekLowerCaseFilter} for best results.
   *  
+   * @param matchVersion Lucene compatibility version
   * @param stopwords a stopword set
   */
-  public GreekAnalyzer(CharArraySet stopwords) {
-    super(stopwords);
+  public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    super(matchVersion, stopwords);
  }
  
  /**
@ -98,10 +100,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new GreekLowerCaseFilter(source);
-    result = new StandardFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
+    result = new StandardFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    result = new GreekStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
@ -22,22 +22,32 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;

 /**
 * Normalizes token text to lower case, removes some Greek diacritics,
 * and standardizes final sigma to sigma. 
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating GreekLowerCaseFilter:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are properly lowercased.
+ * </ul>
 */
 public final class GreekLowerCaseFilter extends TokenFilter {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
+  private final CharacterUtils charUtils;

  /**
   * Create a GreekLowerCaseFilter that normalizes Greek token text.
   * 
+   * @param matchVersion Lucene compatibility version, 
+   *   See <a href="#version">above</a>
   * @param in TokenStream to filter
   */
-  public GreekLowerCaseFilter(TokenStream in) {
+  public GreekLowerCaseFilter(Version matchVersion, TokenStream in) {
    super(in);
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
  }
  
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
@ -40,6 +40,7 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M
  /** Creates a new GreekLowerCaseFilterFactory */
  public GreekLowerCaseFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -47,7 +48,7 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M

  @Override
  public GreekLowerCaseFilter create(TokenStream in) {
-    return new GreekLowerCaseFilter(in);
+    return new GreekLowerCaseFilter(luceneMatchVersion, in);
  }

  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
@ -1,6 +1,7 @@
 package org.apache.lucene.analysis.el;

 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;

 import java.util.Arrays;

@ -204,7 +205,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc4 = new CharArraySet(
+  private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"),
      false);
  
@ -230,7 +231,7 @@ public class GreekStemmer {
    return len;
  }

-  private static final CharArraySet exc6 = new CharArraySet(
+  private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ",
          "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ",
          "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ",
@ -255,7 +256,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc7 = new CharArraySet(
+  private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ",
          "πεθ", "πικρ", "ποτ", "σιχ", "χ"), 
      false);
@ -282,11 +283,11 @@ public class GreekStemmer {
    return len;
  }

-  private static final CharArraySet exc8a = new CharArraySet(
+  private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("τρ", "τσ"),
      false);

-  private static final CharArraySet exc8b = new CharArraySet(
+  private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ",
          "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ",
          "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ",
@ -345,7 +346,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc9 = new CharArraySet(
+  private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ",
          "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ",
          "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), 
@ -433,11 +434,11 @@ public class GreekStemmer {
    return len;
  }

-  private static final CharArraySet exc12a = new CharArraySet(
+  private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"),
      false);

-  private static final CharArraySet exc12b = new CharArraySet(
+  private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"),
      false);
  
@ -457,7 +458,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc13 = new CharArraySet(
+  private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"),
      false);
  
@ -491,7 +492,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc14 = new CharArraySet(
+  private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ",
          "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ",
          "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε",
@ -529,7 +530,7 @@ public class GreekStemmer {
   return len;
  }
  
-  private static final CharArraySet exc15a = new CharArraySet(
+  private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ",
          "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ",
          "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ",
@ -538,7 +539,7 @@ public class GreekStemmer {
          "ουλαμ", "ουρ", "π", "τρ", "μ"), 
      false);
  
-  private static final CharArraySet exc15b = new CharArraySet(
+  private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("ψοφ", "ναυλοχ"),
      false);
  
@ -575,7 +576,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc16 = new CharArraySet(
+  private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"),
      false);
  
@ -595,7 +596,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc17 = new CharArraySet(
+  private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"),
      false);
  
@ -609,7 +610,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc18 = new CharArraySet(
+  private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"),
      false);
  
@ -633,7 +634,7 @@ public class GreekStemmer {
    return len;
  }
  
-  private static final CharArraySet exc19 = new CharArraySet(
+  private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_CURRENT,
      Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"),
      false);
  
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@ -30,6 +30,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for English.
@ -56,17 +57,18 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
   */
-  public EnglishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public EnglishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public EnglishAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -74,12 +76,14 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -97,11 +101,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new EnglishPossessiveFilter(result);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new EnglishPossessiveFilter(matchVersion, result);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new PorterStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
@ -22,6 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;

 /**
 * TokenFilter that removes possessives (trailing 's) from words.
@ -29,7 +30,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 public final class EnglishPossessiveFilter extends TokenFilter {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

-  public EnglishPossessiveFilter(TokenStream input) {
+  // NOTE: version now unused
+  public EnglishPossessiveFilter(Version version, TokenStream input) {
    super(input);
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
@ -39,6 +39,7 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
  /** Creates a new EnglishPossessiveFilterFactory */
  public EnglishPossessiveFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -46,6 +47,6 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
  
  @Override
  public TokenStream create(TokenStream input) {
-    return new EnglishPossessiveFilter(input);
+    return new EnglishPossessiveFilter(luceneMatchVersion, input);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
@ -64,6 +64,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 * <p>Copyright: Copyright 2008, Luicid Imagination, Inc. </p>
 * <p>Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu) </p>
 */
+import org.apache.lucene.util.Version;

 /**
 * This class implements the Kstem algorithm
@ -279,7 +280,7 @@ public class KStemmer {
    DictEntry defaultEntry;
    DictEntry entry;

-    CharArrayMap<DictEntry> d = new CharArrayMap<>(1000, false);
+    CharArrayMap<DictEntry> d = new CharArrayMap<>(Version.LUCENE_CURRENT, 1000, false);
    for (int i = 0; i < exceptionWords.length; i++) {
      if (!d.containsKey(exceptionWords[i])) {
        entry = new DictEntry(exceptionWords[i], true);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Spanish.
@ -62,7 +63,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -74,17 +75,18 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public SpanishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public SpanishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public SpanishAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -92,12 +94,14 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -114,10 +118,10 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SpanishLightStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.BasqueStemmer;

 /**
@ -72,17 +73,18 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public BasqueAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public BasqueAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public BasqueAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -90,12 +92,14 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -112,10 +116,10 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new BasqueStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Persian.
@ -86,18 +87,20 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
   * Builds an analyzer with the default stop words:
   * {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public PersianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public PersianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words 
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public PersianAnalyzer(CharArraySet stopwords){
-    super(stopwords);
+  public PersianAnalyzer(Version matchVersion, CharArraySet stopwords){
+    super(matchVersion, stopwords);
  }

  /**
@ -112,8 +115,8 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new LowerCaseFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
@ -121,7 +124,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
     * the order here is important: the stopword list is normalized with the
     * above!
     */
-    return new TokenStreamComponents(source, new StopFilter(result, stopwords));
+    return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
  }
  
  /** 
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.FinnishStemmer;

 /**
@ -63,7 +64,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -75,17 +76,18 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public FinnishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public FinnishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public FinnishAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -93,12 +95,14 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public FinnishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -115,10 +119,10 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new FinnishStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -59,7 +59,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
  
  /** Default set of articles for ElisionFilter */
  public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
-      new CharArraySet(Arrays.asList(
+      new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
          "l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true));

  /**
@ -80,7 +80,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-                DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+                DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -92,33 +92,37 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
   */
-  public FrenchAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public FrenchAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public FrenchAnalyzer(CharArraySet stopwords){
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   * @param stemExclutionSet
   *          a stemming exclusion set
   */
-  public FrenchAnalyzer(CharArraySet stopwords,
+  public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords,
      CharArraySet stemExclutionSet) {
-    super(stopwords);
+    super(matchVersion, stopwords);
    this.excltable = CharArraySet.unmodifiableSet(CharArraySet
-        .copy(stemExclutionSet));
+        .copy(matchVersion, stemExclutionSet));
  }

  /**
@ -135,11 +139,11 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new ElisionFilter(result, DEFAULT_ARTICLES);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!excltable.isEmpty())
      result = new SetKeywordMarkerFilter(result, excltable);
    result = new FrenchLightStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
@ -32,6 +32,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ElisionFilter;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.IrishStemmer;

 /**
@ -44,7 +45,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  
  private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
-      new CharArraySet(
+      new CharArraySet(Version.LUCENE_CURRENT, 
          Arrays.asList(
              "d", "m", "b"
          ), true));
@ -55,7 +56,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
   * with phrase queries versus tAthair (which would not have a gap).
   */
  private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet(
-      new CharArraySet(
+      new CharArraySet(Version.LUCENE_CURRENT,
          Arrays.asList(
              "h", "n", "t"
          ), true));
@ -90,17 +91,18 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public IrishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public IrishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public IrishAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -108,12 +110,14 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public IrishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -130,12 +134,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new StopFilter(result, HYPHENATIONS);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new StopFilter(matchVersion, result, HYPHENATIONS);
    result = new ElisionFilter(result, DEFAULT_ARTICLES);
    result = new IrishLowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new IrishStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Galician.
@ -61,7 +62,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -73,17 +74,18 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public GalicianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public GalicianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public GalicianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -91,12 +93,14 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -113,10 +117,10 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new GalicianStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.in.IndicNormalizationFilter;
+import org.apache.lucene.util.Version;

 /**
 * Analyzer for Hindi.
@ -74,29 +75,32 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param version lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a stemming exclusion set
   */
-  public HindiAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(version, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(
+        CharArraySet.copy(matchVersion, stemExclusionSet));
  }
  
  /**
   * Builds an analyzer with the given stop words 
   * 
+   * @param version lucene compatibility version
   * @param stopwords a stopword set
   */
-  public HindiAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public HindiAnalyzer(Version version, CharArraySet stopwords) {
+    this(version, stopwords, CharArraySet.EMPTY_SET);
  }
  
  /**
   * Builds an analyzer with the default stop words:
   * {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public HindiAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public HindiAnalyzer(Version version) {
+    this(version, DefaultSetHolder.DEFAULT_STOP_SET);
  }

  /**
@ -113,13 +117,13 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new LowerCaseFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
    if (!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new IndicNormalizationFilter(result);
    result = new HindiNormalizationFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new StopFilter(matchVersion, result, stopwords);
    result = new HindiStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.HungarianStemmer;

 /**
@ -63,7 +64,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -75,17 +76,18 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public HungarianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public HungarianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public HungarianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -93,12 +95,14 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public HungarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -115,10 +119,10 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new HungarianStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@ -215,7 +215,7 @@ final class Stemmer {
    if (stems.size() < 2) {
      return stems;
    }
-    CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
+    CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
    List<CharsRef> deduped = new ArrayList<>();
    for (CharsRef s : stems) {
      if (!terms.contains(s)) {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.ArmenianStemmer;

 /**
@ -72,17 +73,18 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public ArmenianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public ArmenianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public ArmenianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -90,12 +92,14 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -112,10 +116,10 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new ArmenianStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;

 /**
 * Analyzer for Indonesian (Bahasa)
@ -68,18 +69,20 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public IndonesianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public IndonesianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   */
-  public IndonesianAnalyzer(CharArraySet stopwords){
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -87,14 +90,17 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * {@link IndonesianStemFilter}.
   * 
+   * @param matchVersion
+   *          lucene compatibility version
   * @param stopwords
   *          a stopword set
   * @param stemExclusionSet
   *          a set of terms not to be stemmed
   */
-  public IndonesianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -110,10 +116,10 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if (!stemExclusionSet.isEmpty()) {
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@ -36,6 +36,7 @@ import org.apache.lucene.analysis.util.ElisionFilter;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Italian.
@ -47,7 +48,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
  public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
  
  private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
-      new CharArraySet(
+      new CharArraySet(Version.LUCENE_CURRENT, 
          Arrays.asList(
          "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", 
          "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
@ -71,7 +72,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -83,17 +84,18 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public ItalianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public ItalianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public ItalianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -101,12 +103,14 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -123,11 +127,11 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new ElisionFilter(result, DEFAULT_ARTICLES);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new ItalianLightStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Latvian.
@ -61,7 +62,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -73,17 +74,18 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public LatvianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public LatvianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public LatvianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -91,12 +93,14 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -113,10 +117,10 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new LatvianStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
@ -82,7 +82,7 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
    boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
    Set<String> k = getSet(args, KEEP);
    if (k != null) {
-      keep = new CharArraySet(10, ignoreCase);
+      keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
      keep.addAll(k);
    }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.miscellaneous;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;

 /**
 * Removes words that are too long or too short from the stream.
@ -38,12 +39,13 @@ public final class CodepointCountFilter extends FilteringTokenFilter {
   * Create a new {@link CodepointCountFilter}. This will filter out tokens whose
   * {@link CharTermAttribute} is either too short ({@link Character#codePointCount(char[], int, int)}
   * &lt; min) or too long ({@link Character#codePointCount(char[], int, int)} &gt; max).
+   * @param version the Lucene match version
   * @param in      the {@link TokenStream} to consume
   * @param min     the minimum length
   * @param max     the maximum length
   */
-  public CodepointCountFilter(TokenStream in, int min, int max) {
-    super(in);
+  public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
+    super(version, in);
    if (min < 0) {
      throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
    }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
@ -50,6 +50,6 @@ public class CodepointCountFilterFactory extends TokenFilterFactory {
  
  @Override
  public CodepointCountFilter create(TokenStream input) {
-    return new CodepointCountFilter(input, min, max);
+    return new CodepointCountFilter(luceneMatchVersion, input, min, max);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
@ -21,6 +21,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;

 /**
 * A TokenFilter that only keeps tokens with text contained in the
@ -36,11 +37,12 @@ public final class KeepWordFilter extends FilteringTokenFilter {
   * Create a new {@link KeepWordFilter}.
   * <p><b>NOTE</b>: The words set passed to this constructor will be directly
   * used by this filter and should not be modified.
+   * @param version the Lucene match version
   * @param in      the {@link TokenStream} to consume
   * @param words   the words to keep
   */
-  public KeepWordFilter(TokenStream in, CharArraySet words) {
-    super(in);
+  public KeepWordFilter(Version version, TokenStream in, CharArraySet words) {
+    super(version, in);
    this.words = words;
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
@ -44,6 +44,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
  /** Creates a new KeepWordFilterFactory */
  public KeepWordFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    wordFiles = get(args, "words");
    ignoreCase = getBoolean(args, "ignoreCase", false);
    if (!args.isEmpty()) {
@ -72,7 +73,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
    if (words == null) {
      return input;
    } else {
-      final TokenStream filter = new KeepWordFilter(input, words);
+      final TokenStream filter = new KeepWordFilter(luceneMatchVersion, input, words);
      return filter;
    }
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.miscellaneous;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;

 /**
 * Removes words that are too long or too short from the stream.
@ -38,12 +39,13 @@ public final class LengthFilter extends FilteringTokenFilter {
   * Create a new {@link LengthFilter}. This will filter out tokens whose
   * {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()}
   * &lt; min) or too long ({@link CharTermAttribute#length()} &gt; max).
+   * @param version the Lucene match version
   * @param in      the {@link TokenStream} to consume
   * @param min     the minimum length
   * @param max     the maximum length
   */
-  public LengthFilter(TokenStream in, int min, int max) {
-    super(in);
+  public LengthFilter(Version version, TokenStream in, int min, int max) {
+    super(version, in);
    if (min < 0) {
      throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
    }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
@ -50,7 +50,7 @@ public class LengthFilterFactory extends TokenFilterFactory {
  
  @Override
  public LengthFilter create(TokenStream input) {
-    final LengthFilter filter = new LengthFilter(input,min,max);
+    final LengthFilter filter = new LengthFilter(luceneMatchVersion, input,min,max);
    return filter;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;

 import java.io.IOException;

@ -33,7 +34,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
  private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
  private final PositionIncrementAttribute posIncAttribute =  addAttribute(PositionIncrementAttribute.class);
  
-  private final CharArraySet previous = new CharArraySet(8, false);
+  // use a fixed version, as we don't care about case sensitivity.
+  private final CharArraySet previous = new CharArraySet(Version.LUCENE_CURRENT, 8, false);

  /**
   * Creates a new RemoveDuplicatesTokenFilter
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
@ -20,11 +20,15 @@ package org.apache.lucene.analysis.miscellaneous;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.Version;

 import java.io.IOException;

 /**
 * Trims leading and trailing whitespace from Tokens in the stream.
+ * <p>As of Lucene 4.4, this filter does not support updateOffsets=true anymore
+ * as it can lead to broken token streams.
 */
 public final class TrimFilter extends TokenFilter {

@ -32,9 +36,10 @@ public final class TrimFilter extends TokenFilter {

  /**
   * Create a new {@link TrimFilter}.
+   * @param version       the Lucene match version
   * @param in            the stream to consume
   */
-  public TrimFilter(TokenStream in) {
+  public TrimFilter(Version version, TokenStream in) {
    super(in);
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
@ -47,7 +47,7 @@ public class TrimFilterFactory extends TokenFilterFactory {
  
  @Override
  public TrimFilter create(TokenStream input) {
-    final TrimFilter filter = new TrimFilter(input);
+    final TrimFilter filter = new TrimFilter(luceneMatchVersion, input);
    return filter;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@ -80,7 +80,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
    }

    this.charUtils = version.onOrAfter(Version.LUCENE_4_4)
-        ? CharacterUtils.getInstance()
+        ? CharacterUtils.getInstance(version)
        : CharacterUtils.getJava4Instance();
    this.minGram = minGram;
    this.maxGram = maxGram;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@ -81,10 +81,10 @@ public final class NGramTokenFilter extends TokenFilter {
   * @param maxGram the largest n-gram to generate
   */
  public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
-    super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
+    super(new CodepointCountFilter(version, input, minGram, Integer.MAX_VALUE));
    this.version = version;
    this.charUtils = version.onOrAfter(Version.LUCENE_4_4)
-        ? CharacterUtils.getInstance()
+        ? CharacterUtils.getInstance(version)
        : CharacterUtils.getJava4Instance();
    if (minGram < 1) {
      throw new IllegalArgumentException("minGram must be greater than zero");
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@ -121,7 +121,7 @@ public class NGramTokenizer extends Tokenizer {
      throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer");
    }
    charUtils = version.onOrAfter(Version.LUCENE_4_4)
-        ? CharacterUtils.getInstance()
+        ? CharacterUtils.getInstance(version)
        : CharacterUtils.getJava4Instance();
    if (minGram < 1) {
      throw new IllegalArgumentException("minGram must be greater than zero");
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -28,11 +28,13 @@ import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.analysis.util.CharArrayMap;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 import java.io.IOException;
 import java.io.Reader;
@ -48,8 +50,10 @@ import java.nio.charset.StandardCharsets;
 * A default set of stopwords is used unless an alternative list is specified, but the
 * exclusion list is empty by default.
 * </p>
+ * 
+ * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}.</p>
 */
-// TODO: extend StopwordAnalyzerBase
 public final class DutchAnalyzer extends Analyzer {
  
  /** File containing default Dutch stopwords. */
@ -69,14 +73,14 @@ public final class DutchAnalyzer extends Analyzer {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
      
-      DEFAULT_STEM_DICT = new CharArrayMap<>(4, false);
+      DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false);
      DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
      DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
      DEFAULT_STEM_DICT.put("ei", "eier");
@ -96,27 +100,29 @@ public final class DutchAnalyzer extends Analyzer {
  private CharArraySet excltable = CharArraySet.EMPTY_SET;

  private final StemmerOverrideMap stemdict;
+  private final Version matchVersion;

  /**
   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}) 
   * and a few default entries for the stem exclusion table.
   * 
   */
-  public DutchAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
+  public DutchAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
  }
  
-  public DutchAnalyzer(CharArraySet stopwords){
-    this(stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
+  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
  }
  
-  public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable){
-    this(stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT);
+  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
+    this(matchVersion, stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT);
  }
  
-  public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
-    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
-    this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
+  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
+    this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+    this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
    if (stemOverrideDict.isEmpty()) {
      this.stemdict = null;
    } else {
@ -148,10 +154,10 @@ public final class DutchAnalyzer extends Analyzer {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stoptable);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stoptable);
    if (!excltable.isEmpty())
      result = new SetKeywordMarkerFilter(result, excltable);
    if (stemdict != null)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.NorwegianStemmer;

 /**
@ -63,7 +64,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -75,17 +76,18 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public NorwegianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public NorwegianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public NorwegianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -93,12 +95,14 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public NorwegianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -115,10 +119,10 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new NorwegianStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;

 /**
 * {@link Analyzer} for Portuguese.
@ -62,7 +63,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -74,17 +75,18 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public PortugueseAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public PortugueseAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public PortugueseAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -92,12 +94,14 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public PortugueseAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -114,10 +118,10 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new PortugueseLightStemFilter(result);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@ -31,6 +31,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;

 import static org.apache.lucene.analysis.util.StemmerUtil.*;

@ -134,7 +135,8 @@ public abstract class RSLPStemmerBase {
        if (!exceptions[i].endsWith(suffix))
          throw new RuntimeException("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
      }
-      this.exceptions = new CharArraySet(Arrays.asList(exceptions), false);
+      this.exceptions = new CharArraySet(Version.LUCENE_CURRENT,
+           Arrays.asList(exceptions), false);
    }

    @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@ -31,6 +31,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.Version;

 /**
 * An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection
@ -49,20 +50,23 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
  //The default maximum percentage (40%) of index documents which
  //can contain a term, after which the term is considered to be a stop word.
  public static final float defaultMaxDocFreqPercent = 0.4f;
+  private final Version matchVersion;

  /**
   * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
   * indexed fields from terms with a document frequency percentage greater than
   * {@link #defaultMaxDocFreqPercent}
   *
+   * @param matchVersion Version to be used in {@link StopFilter}
   * @param delegate Analyzer whose TokenStream will be filtered
   * @param indexReader IndexReader to identify the stopwords from
   * @throws IOException Can be thrown while reading from the IndexReader
   */
  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
      Analyzer delegate,
      IndexReader indexReader) throws IOException {
-    this(delegate, indexReader, defaultMaxDocFreqPercent);
+    this(matchVersion, delegate, indexReader, defaultMaxDocFreqPercent);
  }

  /**
@ -70,16 +74,18 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * indexed fields from terms with a document frequency greater than the given
   * maxDocFreq
   *
+   * @param matchVersion Version to be used in {@link StopFilter}
   * @param delegate Analyzer whose TokenStream will be filtered
   * @param indexReader IndexReader to identify the stopwords from
   * @param maxDocFreq Document frequency terms should be above in order to be stopwords
   * @throws IOException Can be thrown while reading from the IndexReader
   */
  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
      Analyzer delegate,
      IndexReader indexReader,
      int maxDocFreq) throws IOException {
-    this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq);
+    this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq);
  }

  /**
@ -87,6 +93,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * indexed fields from terms with a document frequency percentage greater than
   * the given maxPercentDocs
   *
+   * @param matchVersion Version to be used in {@link StopFilter}
   * @param delegate Analyzer whose TokenStream will be filtered
   * @param indexReader IndexReader to identify the stopwords from
   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
@ -94,10 +101,11 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * @throws IOException Can be thrown while reading from the IndexReader
   */
  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
      Analyzer delegate,
      IndexReader indexReader,
      float maxPercentDocs) throws IOException {
-    this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs);
+    this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs);
  }

  /**
@ -105,6 +113,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * given selection of fields from terms with a document frequency percentage
   * greater than the given maxPercentDocs
   *
+   * @param matchVersion Version to be used in {@link StopFilter}
   * @param delegate Analyzer whose TokenStream will be filtered
   * @param indexReader IndexReader to identify the stopwords from
   * @param fields Selection of fields to calculate stopwords for
@ -113,11 +122,12 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * @throws IOException Can be thrown while reading from the IndexReader
   */
  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
      Analyzer delegate,
      IndexReader indexReader,
      Collection<String> fields,
      float maxPercentDocs) throws IOException {
-    this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
+    this(matchVersion, delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
  }

  /**
@ -125,6 +135,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * given selection of fields from terms with a document frequency greater than
   * the given maxDocFreq
   *
+   * @param matchVersion Version to be used in {@link StopFilter}
   * @param delegate Analyzer whose TokenStream will be filtered
   * @param indexReader IndexReader to identify the stopwords from
   * @param fields Selection of fields to calculate stopwords for
@ -132,11 +143,13 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
   * @throws IOException Can be thrown while reading from the IndexReader
   */
  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
      Analyzer delegate,
      IndexReader indexReader,
      Collection<String> fields,
      int maxDocFreq) throws IOException {
    super(delegate.getReuseStrategy());
+    this.matchVersion = matchVersion;
    this.delegate = delegate;
    
    for (String field : fields) {
@ -168,8 +181,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
    if (stopWords == null) {
      return components;
    }
-    StopFilter stopFilter = new StopFilter(components.getTokenStream(), 
-        new CharArraySet(stopWords, false));
+    StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), 
+        new CharArraySet(matchVersion, stopWords, false));
    return new TokenStreamComponents(components.getTokenizer(), stopFilter);
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.reverse;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;

 import java.io.IOException;

@ -35,6 +36,7 @@ public final class ReverseStringFilter extends TokenFilter {

  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final char marker;
+  private final Version matchVersion;
  private static final char NOMARKER = '\uFFFF';
  
  /**
@ -64,10 +66,11 @@ public final class ReverseStringFilter extends TokenFilter {
   * The reversed tokens will not be marked. 
   * </p>
   * 
+   * @param matchVersion Lucene compatibility version
   * @param in {@link TokenStream} to filter
   */
-  public ReverseStringFilter(TokenStream in) {
-    this(in, NOMARKER);
+  public ReverseStringFilter(Version matchVersion, TokenStream in) {
+    this(matchVersion, in, NOMARKER);
  }

  /**
@ -78,11 +81,13 @@ public final class ReverseStringFilter extends TokenFilter {
   * character.
   * </p>
   * 
+   * @param matchVersion compatibility version
   * @param in {@link TokenStream} to filter
   * @param marker A character used to mark reversed tokens
   */
-  public ReverseStringFilter(TokenStream in, char marker) {
+  public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) {
    super(in);
+    this.matchVersion = matchVersion;
    this.marker = marker;
  }

@ -95,7 +100,7 @@ public final class ReverseStringFilter extends TokenFilter {
        termAtt.resizeBuffer(len);
        termAtt.buffer()[len - 1] = marker;
      }
-      reverse( termAtt.buffer(), 0, len );
+      reverse( matchVersion, termAtt.buffer(), 0, len );
      termAtt.setLength(len);
      return true;
    } else {
@ -106,43 +111,48 @@ public final class ReverseStringFilter extends TokenFilter {
  /**
   * Reverses the given input string
   * 
+   * @param matchVersion compatibility version
   * @param input the string to reverse
   * @return the given input string in reversed order
   */
-  public static String reverse(final String input ){
+  public static String reverse( Version matchVersion, final String input ){
    final char[] charInput = input.toCharArray();
-    reverse( charInput, 0, charInput.length );
+    reverse( matchVersion, charInput, 0, charInput.length );
    return new String( charInput );
  }
  
  /**
   * Reverses the given input buffer in-place
+   * @param matchVersion compatibility version
   * @param buffer the input char array to reverse
   */
-  public static void reverse(final char[] buffer) {
-    reverse(buffer, 0, buffer.length);
+  public static void reverse(Version matchVersion, final char[] buffer) {
+    reverse(matchVersion, buffer, 0, buffer.length);
  }
  
  /**
   * Partially reverses the given input buffer in-place from offset 0
   * up to the given length.
+   * @param matchVersion compatibility version
   * @param buffer the input char array to reverse
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
   */
-  public static void reverse(final char[] buffer, final int len) {
-    reverse( buffer, 0, len );
+  public static void reverse(Version matchVersion, final char[] buffer,
+      final int len) {
+    reverse( matchVersion, buffer, 0, len );
  }
  
  /**
   * Partially reverses the given input buffer in-place from the given offset
   * up to the given length.
+   * @param matchVersion compatibility version
   * @param buffer the input char array to reverse
   * @param start the offset from where to reverse the buffer
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
   */
-  public static void reverse(final char[] buffer,
+  public static void reverse(Version matchVersion, final char[] buffer,
      final int start, final int len) {
    /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
    if (len < 2)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
@ -40,6 +40,7 @@ public class ReverseStringFilterFactory extends TokenFilterFactory {
  /** Creates a new ReverseStringFilterFactory */
  public ReverseStringFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -47,7 +48,7 @@ public class ReverseStringFilterFactory extends TokenFilterFactory {
  
  @Override
  public ReverseStringFilter create(TokenStream in) {
-    return new ReverseStringFilter(in);
+    return new ReverseStringFilter(luceneMatchVersion,in);
  }
 }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@ -78,17 +78,18 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public RomanianAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public RomanianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public RomanianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -96,12 +97,14 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public RomanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -118,10 +121,10 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new RomanianStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -54,7 +54,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
      static {
        try {
          DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-              DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+              DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
        } catch (IOException ex) {
          // default set should always be present as it is part of the
          // distribution (JAR)
@ -74,30 +74,34 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
      return DefaultSetHolder.DEFAULT_STOP_SET;
    }

-    public RussianAnalyzer() {
-      this(DefaultSetHolder.DEFAULT_STOP_SET);
+    public RussianAnalyzer(Version matchVersion) {
+      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
    }
  
    /**
     * Builds an analyzer with the given stop words
     * 
+     * @param matchVersion
+     *          lucene compatibility version
     * @param stopwords
     *          a stopword set
     */
-    public RussianAnalyzer(CharArraySet stopwords) {
-      this(stopwords, CharArraySet.EMPTY_SET);
+    public RussianAnalyzer(Version matchVersion, CharArraySet stopwords){
+      this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
    }
    
    /**
     * Builds an analyzer with the given stop words
     * 
+     * @param matchVersion
+     *          lucene compatibility version
     * @param stopwords
     *          a stopword set
     * @param stemExclusionSet a set of words not to be stemmed
     */
-    public RussianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-      super(stopwords);
-      this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+    public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
+      super(matchVersion, stopwords);
+      this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
    }
   
  /**
@ -113,10 +117,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
   */
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
-      final Tokenizer source = new StandardTokenizer();
-      TokenStream result = new StandardFilter(source);
-      result = new LowerCaseFilter(result);
-      result = new StopFilter(result, stopwords);
+      final Tokenizer source = new StandardTokenizer(matchVersion);
+      TokenStream result = new StandardFilter(matchVersion, source);
+      result = new LowerCaseFilter(matchVersion, result);
+      result = new StopFilter(matchVersion, result, stopwords);
      if (!stemExclusionSet.isEmpty()) 
        result = new SetKeywordMarkerFilter(result, stemExclusionSet);
      result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.shingle;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.AnalyzerWrapper;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.util.Version;

 /**
 * A ShingleAnalyzerWrapper wraps a {@link ShingleFilter} around another {@link Analyzer}.
@ -100,15 +101,15 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
  /**
   * Wraps {@link StandardAnalyzer}. 
   */
-  public ShingleAnalyzerWrapper() {
-    this(ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+  public ShingleAnalyzerWrapper(Version matchVersion) {
+    this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
  }

  /**
   * Wraps {@link StandardAnalyzer}. 
   */
-  public ShingleAnalyzerWrapper(int minShingleSize, int maxShingleSize) {
-    this(new StandardAnalyzer(), minShingleSize, maxShingleSize);
+  public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) {
+    this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize);
  }

  /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@ -17,14 +17,16 @@ package org.apache.lucene.analysis.standard;
 * limitations under the License.
 */

-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;

+import java.io.File;
 import java.io.IOException;
 import java.io.Reader;

@ -33,6 +35,18 @@ import java.io.Reader;
 * LowerCaseFilter} and {@link StopFilter}, using a list of
 * English stop words.
 *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ClassicAnalyzer:
+ * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ * </ul>
+ * 
 * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
 * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
 * as specified by UAX#29.
@ -49,23 +63,29 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 

  /** Builds an analyzer with the given stop words.
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   * @param stopWords stop words */
-  public ClassicAnalyzer(CharArraySet stopWords) {
-    super(stopWords);
+  public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) {
+    super(matchVersion, stopWords);
  }

  /** Builds an analyzer with the default stop words ({@link
   * #STOP_WORDS_SET}).
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   */
-  public ClassicAnalyzer() {
-    this(STOP_WORDS_SET);
+  public ClassicAnalyzer(Version matchVersion) {
+    this(matchVersion, STOP_WORDS_SET);
  }

  /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
+   * @see WordlistLoader#getWordSet(Reader, Version)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   * @param stopwords Reader to read stop words from */
-  public ClassicAnalyzer(Reader stopwords) throws IOException {
-    this(loadStopwordSet(stopwords));
+  public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
  }

  /**
@ -87,11 +107,11 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {

  @Override
  protected TokenStreamComponents createComponents(final String fieldName) {
-    final ClassicTokenizer src = new ClassicTokenizer();
+    final ClassicTokenizer src = new ClassicTokenizer(matchVersion);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new ClassicFilter(src);
-    tok = new LowerCaseFilter(tok);
-    tok = new StopFilter(tok, stopwords);
+    tok = new LowerCaseFilter(matchVersion, tok);
+    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
@ -18,6 +18,7 @@
 package org.apache.lucene.analysis.standard;

 import java.io.IOException;
+import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -25,6 +26,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;

 /** A grammar-based tokenizer constructed with JFlex
 *
@ -99,19 +102,19 @@ public final class ClassicTokenizer extends Tokenizer {
   *
   * See http://issues.apache.org/jira/browse/LUCENE-1068
   */
-  public ClassicTokenizer() {
-    init();
+  public ClassicTokenizer(Version matchVersion) {
+    init(matchVersion);
  }

  /**
   * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} 
   */
-  public ClassicTokenizer(AttributeFactory factory) {
+  public ClassicTokenizer(Version matchVersion, AttributeFactory factory) {
    super(factory);
-    init();
+    init(matchVersion);
  }

-  private void init() {
+  private void init(Version matchVersion) {
    this.scanner = new ClassicTokenizerImpl(input);
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
@ -37,6 +37,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory {
  /** Creates a new ClassicTokenizerFactory */
  public ClassicTokenizerFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
@ -45,7 +46,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory {

  @Override
  public ClassicTokenizer create(AttributeFactory factory) {
-    ClassicTokenizer tokenizer = new ClassicTokenizer(factory);
+    ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -17,14 +17,16 @@ package org.apache.lucene.analysis.standard;
 * limitations under the License.
 */

-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;

+import java.io.File;
 import java.io.IOException;
 import java.io.Reader;

@ -32,6 +34,23 @@ import java.io.Reader;
 * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
 * LowerCaseFilter} and {@link StopFilter}, using a list of
 * English stop words.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ * <ul>
+ *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ *        from their combining characters. If you use a previous version number,
+ *        you get the exact broken behavior for backwards compatibility.
+ *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
+ *        and StopFilter correctly handles Unicode 4.0 supplementary characters
+ *        in stopwords.  {@link ClassicTokenizer} and {@link ClassicAnalyzer} 
+ *        are the pre-3.1 implementations of StandardTokenizer and
+ *        StandardAnalyzer.
+ *   <li> As of 2.9, StopFilter preserves position increments
+ *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ * </ul>
 */
 public final class StandardAnalyzer extends StopwordAnalyzerBase {

@ -45,22 +64,29 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 

  /** Builds an analyzer with the given stop words.
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   * @param stopWords stop words */
-  public StandardAnalyzer(CharArraySet stopWords) {
-    super(stopWords);
+  public StandardAnalyzer(Version matchVersion, CharArraySet stopWords) {
+    super(matchVersion, stopWords);
  }

-  /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
+  /** Builds an analyzer with the default stop words ({@link
+   * #STOP_WORDS_SET}).
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   */
-  public StandardAnalyzer() {
-    this(STOP_WORDS_SET);
+  public StandardAnalyzer(Version matchVersion) {
+    this(matchVersion, STOP_WORDS_SET);
  }

  /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
+   * @see WordlistLoader#getWordSet(Reader, Version)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   * @param stopwords Reader to read stop words from */
-  public StandardAnalyzer(Reader stopwords) throws IOException {
-    this(loadStopwordSet(stopwords));
+  public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
  }

  /**
@ -82,11 +108,11 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {

  @Override
  protected TokenStreamComponents createComponents(final String fieldName) {
-    final StandardTokenizer src = new StandardTokenizer();
+    final StandardTokenizer src = new StandardTokenizer(matchVersion);
    src.setMaxTokenLength(maxTokenLength);
-    TokenStream tok = new StandardFilter(src);
-    tok = new LowerCaseFilter(tok);
-    tok = new StopFilter(tok, stopwords);
+    TokenStream tok = new StandardFilter(matchVersion, src);
+    tok = new LowerCaseFilter(matchVersion, tok);
+    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@ -21,13 +21,14 @@ import java.io.IOException;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.Version;

 /**
 * Normalizes tokens extracted with {@link StandardTokenizer}.
 */
 public class StandardFilter extends TokenFilter {
  
-  public StandardFilter(TokenStream in) {
+  public StandardFilter(Version matchVersion, TokenStream in) {
    super(in);
  }
  
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
@ -38,6 +38,7 @@ public class StandardFilterFactory extends TokenFilterFactory {
  /** Creates a new StandardFilterFactory */
  public StandardFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -45,6 +46,6 @@ public class StandardFilterFactory extends TokenFilterFactory {
  
  @Override
  public StandardFilter create(TokenStream input) {
-    return new StandardFilter(input);
+    return new StandardFilter(luceneMatchVersion, input);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -18,6 +18,7 @@
 package org.apache.lucene.analysis.standard;

 import java.io.IOException;
+import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -25,6 +26,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;

 /** A grammar-based tokenizer constructed with JFlex.
 * <p>
@ -113,19 +116,19 @@ public final class StandardTokenizer extends Tokenizer {

   * See http://issues.apache.org/jira/browse/LUCENE-1068
   */
-  public StandardTokenizer() {
-    init();
+  public StandardTokenizer(Version matchVersion) {
+    init(matchVersion);
  }

  /**
   * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} 
   */
-  public StandardTokenizer(AttributeFactory factory) {
+  public StandardTokenizer(Version matchVersion, AttributeFactory factory) {
    super(factory);
-    init();
+    init(matchVersion);
  }

-  private void init() {
+  private void init(Version matchVersion) {
    this.scanner = new StandardTokenizerImpl(input);
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
@ -37,6 +37,7 @@ public class StandardTokenizerFactory extends TokenizerFactory {
  /** Creates a new StandardTokenizerFactory */
  public StandardTokenizerFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
@ -45,7 +46,7 @@ public class StandardTokenizerFactory extends TokenizerFactory {

  @Override
  public StandardTokenizer create(AttributeFactory factory) {
-    StandardTokenizer tokenizer = new StandardTokenizer(factory);
+    StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@ -34,6 +34,12 @@ import java.io.Reader;
 * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and
 * {@link org.apache.lucene.analysis.core.StopFilter}, using a list of
 * English stop words.
+ *
+ * <a name="version"/>
+ * <p>
+ *   You must specify the required {@link org.apache.lucene.util.Version}
+ *   compatibility when creating UAX29URLEmailAnalyzer
+ * </p>
 */
 public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {

@ -47,23 +53,29 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

  /** Builds an analyzer with the given stop words.
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   * @param stopWords stop words */
-  public UAX29URLEmailAnalyzer(CharArraySet stopWords) {
-    super(stopWords);
+  public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) {
+    super(matchVersion, stopWords);
  }

  /** Builds an analyzer with the default stop words ({@link
   * #STOP_WORDS_SET}).
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   */
-  public UAX29URLEmailAnalyzer() {
-    this(STOP_WORDS_SET);
+  public UAX29URLEmailAnalyzer(Version matchVersion) {
+    this(matchVersion, STOP_WORDS_SET);
  }

  /** Builds an analyzer with the stop words from the given reader.
-   * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader)
+   * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
   * @param stopwords Reader to read stop words from */
-  public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException {
-    this(loadStopwordSet(stopwords));
+  public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
  }

  /**
@ -85,11 +97,11 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {

  @Override
  protected TokenStreamComponents createComponents(final String fieldName) {
-    final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer();
+    final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion);
    src.setMaxTokenLength(maxTokenLength);
-    TokenStream tok = new StandardFilter(src);
-    tok = new LowerCaseFilter(tok);
-    tok = new StopFilter(tok, stopwords);
+    TokenStream tok = new StandardFilter(matchVersion, src);
+    tok = new LowerCaseFilter(matchVersion, tok);
+    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@ -18,6 +18,9 @@ package org.apache.lucene.analysis.standard;
 */

 import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -25,6 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;

 /**
 * This class implements Word Break rules from the Unicode Text Segmentation 
@ -95,19 +100,19 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
   * the <code>input</code> to the newly created JFlex scanner.

   */
-  public UAX29URLEmailTokenizer() {
-    this.scanner = getScanner();
+  public UAX29URLEmailTokenizer(Version matchVersion) {
+    this.scanner = getScannerFor(matchVersion);
  }

  /**
   * Creates a new UAX29URLEmailTokenizer with a given {@link AttributeFactory} 
   */
-  public UAX29URLEmailTokenizer(AttributeFactory factory) {
+  public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory) {
    super(factory);
-    this.scanner = getScanner();
+    this.scanner = getScannerFor(matchVersion);
  }

-  private StandardTokenizerInterface getScanner() {
+  private StandardTokenizerInterface getScannerFor(Version matchVersion) {
    return new UAX29URLEmailTokenizerImpl(input);
  }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
@ -38,6 +38,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
  /** Creates a new UAX29URLEmailTokenizerFactory */
  public UAX29URLEmailTokenizerFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
@ -46,7 +47,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {

  @Override
  public UAX29URLEmailTokenizer create(AttributeFactory factory) {
-    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(factory);
+    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.SwedishStemmer;

 /**
@ -63,7 +64,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
+            DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
@ -75,17 +76,18 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
-  public SwedishAnalyzer() {
-    this(DefaultSetHolder.DEFAULT_STOP_SET);
+  public SwedishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  
  /**
   * Builds an analyzer with the given stop words.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
-  public SwedishAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
+  public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }

  /**
@ -93,12 +95,14 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
   * stemming.
   * 
+   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
-  public SwedishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+  public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -115,10 +119,10 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new SwedishStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
@ -134,8 +134,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
      analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
-          Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create();
-          TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
+          Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT) : factory.create();
+          TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
          return new TokenStreamComponents(tokenizer, stream);
        }
      };
@ -202,12 +202,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
  private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException {
    Class<? extends Analyzer> clazz = loader.findClass(cname, Analyzer.class);
    try {
-      Analyzer analyzer = null;
-      try {
-        analyzer = clazz.getConstructor().newInstance();
-      } catch (NoSuchMethodException e) {
-        analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT);
-      }
+      Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT);
      if (analyzer instanceof ResourceLoaderAware) {
        ((ResourceLoaderAware) analyzer).inform(loader);
      }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -34,7 +35,6 @@ import org.apache.lucene.util.Version;
 * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
 */
 public final class ThaiAnalyzer extends StopwordAnalyzerBase {
-  private final Version matchVersion;
  
  /** File containing default Thai stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -87,8 +87,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
   * @param stopwords a stopword set
   */
  public ThaiAnalyzer(Version matchVersion, CharArraySet stopwords) {
-    super(stopwords);
-    this.matchVersion = matchVersion;
+    super(matchVersion, stopwords);
  }

  /**
@ -105,15 +104,15 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName) {
    if (matchVersion.onOrAfter(Version.LUCENE_4_8)) {
      final Tokenizer source = new ThaiTokenizer();
-      TokenStream result = new LowerCaseFilter(source);
-      result = new StopFilter(result, stopwords);
+      TokenStream result = new LowerCaseFilter(matchVersion, source);
+      result = new StopFilter(matchVersion, result, stopwords);
      return new TokenStreamComponents(source, result);
    } else {
-      final Tokenizer source = new StandardTokenizer();
-      TokenStream result = new StandardFilter(source);
-      result = new LowerCaseFilter(result);
-      result = new ThaiWordFilter(result);
-      return new TokenStreamComponents(source, new StopFilter(result, stopwords));
+      final Tokenizer source = new StandardTokenizer(matchVersion);
+      TokenStream result = new StandardFilter(matchVersion, source);
+      result = new LowerCaseFilter(matchVersion, result);
+      result = new ThaiWordFilter(matchVersion, result);
+      return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
    }
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.util.CharArrayIterator;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;

 /**
 * {@link TokenFilter} that use {@link java.text.BreakIterator} to break each 
@ -60,7 +61,7 @@ public final class ThaiWordFilter extends TokenFilter {
  private boolean hasIllegalOffsets = false; // only if the length changed before this filter

  /** Creates a new ThaiWordFilter with the specified match version. */
-  public ThaiWordFilter(TokenStream input) {
+  public ThaiWordFilter(Version matchVersion, TokenStream input) {
    super(input);
    if (!DBBI_AVAILABLE)
      throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
@ -41,6 +41,7 @@ public class ThaiWordFilterFactory extends TokenFilterFactory {
  /** Creates a new ThaiWordFilterFactory */
  public ThaiWordFilterFactory(Map<String,String> args) {
    super(args);
+    assureMatchVersion();
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -48,7 +49,7 @@ public class ThaiWordFilterFactory extends TokenFilterFactory {
  
  @Override
  public ThaiWordFilter create(TokenStream input) {
-    return new ThaiWordFilter(input);
+    return new ThaiWordFilter(luceneMatchVersion, input);
  }
 }

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@ -38,7 +38,6 @@ import org.tartarus.snowball.ext.TurkishStemmer;
 */
 public final class TurkishAnalyzer extends StopwordAnalyzerBase {
  private final CharArraySet stemExclusionSet;
-  private final Version matchVersion;
  
  /** File containing default Turkish stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -102,9 +101,9 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
-    super(stopwords);
-    this.matchVersion = matchVersion;
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
  }

  /**
@ -121,12 +120,12 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new StandardTokenizer();
-    TokenStream result = new StandardFilter(source);
+    final Tokenizer source = new StandardTokenizer(matchVersion);
+    TokenStream result = new StandardFilter(matchVersion, source);
    if(matchVersion.onOrAfter(Version.LUCENE_4_8))
      result = new ApostropheFilter(result);
    result = new TurkishLowerCaseFilter(result);
-    result = new StopFilter(result, stopwords);
+    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new TurkishStemmer());
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
@ -238,10 +238,12 @@ public abstract class AbstractAnalysisFactory {
    if (files.size() > 0) {
      // default stopwords list has 35 or so words, but maybe don't make it that
      // big to start
-      words = new CharArraySet(files.size() * 10, ignoreCase);
+      words = new CharArraySet(luceneMatchVersion,
+          files.size() * 10, ignoreCase);
      for (String file : files) {
        List<String> wlist = getLines(loader, file.trim());
-        words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
+        words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
+            ignoreCase));
      }
    }
    return words;
@ -264,7 +266,8 @@ public abstract class AbstractAnalysisFactory {
    if (files.size() > 0) {
      // default stopwords list has 35 or so words, but maybe don't make it that
      // big to start
-      words = new CharArraySet(files.size() * 10, ignoreCase);
+      words = new CharArraySet(luceneMatchVersion,
+          files.size() * 10, ignoreCase);
      for (String file : files) {
        InputStream stream = null;
        Reader reader = null;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
@ -25,6 +25,8 @@ import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;
+

 /**
 * A simple class that stores key Strings as char[]'s in a
@ -34,6 +36,19 @@ import org.apache.lucene.analysis.util.CharacterUtils;
 * etc.  It is designed to be quick to retrieve items
 * by char[] keys without the necessity of converting
 * to a String first.
+ *
+ * <a name="version"></a>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating {@link CharArrayMap}:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are
+ *       properly lowercased.</li>
+ * </ul>
+ * Before 3.1 supplementary characters could not be
+ * lowercased correctly due to the lack of Unicode 4
+ * support in JDK 1.4. To use instances of
+ * {@link CharArrayMap} with the behavior before Lucene
+ * 3.1 pass a {@link Version} &lt; 3.1 to the constructors.
 */
 public class CharArrayMap<V> extends AbstractMap<Object,V> {
  // private only because missing generics
@ -43,12 +58,16 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
  private final CharacterUtils charUtils;
  private boolean ignoreCase;  
  private int count;
+  final Version matchVersion; // package private because used in CharArraySet
  char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
  V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator

  /**
   * Create map with enough capacity to hold startSize terms
   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
   * @param startSize
   *          the initial capacity
   * @param ignoreCase
@ -56,27 +75,31 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
   *          otherwise <code>true</code>.
   */
  @SuppressWarnings("unchecked")
-  public CharArrayMap(int startSize, boolean ignoreCase) {
+  public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
    this.ignoreCase = ignoreCase;
    int size = INIT_SIZE;
    while(startSize + (startSize>>2) > size)
      size <<= 1;
    keys = new char[size][];
    values = (V[]) new Object[size];
-    this.charUtils = CharacterUtils.getInstance();
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
+    this.matchVersion = matchVersion;
  }

  /**
   * Creates a map from the mappings in another map. 
   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
   * @param c
   *          a map whose mappings to be copied
   * @param ignoreCase
   *          <code>false</code> if and only if the set should be case sensitive
   *          otherwise <code>true</code>.
   */
-  public CharArrayMap(Map<?,? extends V> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
+  public CharArrayMap(Version matchVersion, Map<?,? extends V> c, boolean ignoreCase) {
+    this(matchVersion, c.size(), ignoreCase);
    putAll(c);
  }
  
@ -87,6 +110,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
    this.ignoreCase = toCopy.ignoreCase;
    this.count = toCopy.count;
    this.charUtils = toCopy.charUtils;
+    this.matchVersion = toCopy.matchVersion;
  }
  
  /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
@ -541,7 +565,18 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
  /**
   * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
   * is a {@link CharArrayMap} the ignoreCase property will be preserved.
+   * <p>
+   * <b>Note:</b> If you intend to create a copy of another {@link CharArrayMap} where
+   * the {@link Version} of the source map differs from its copy
+   * {@link #CharArrayMap(Version, Map, boolean)} should be used instead.
+   * The {@link #copy(Version, Map)} will preserve the {@link Version} of the
+   * source map it is an instance of {@link CharArrayMap}.
+   * </p>
   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details. This argument will be ignored if the
+   *          given map is a {@link CharArrayMap}.
   * @param map
   *          a map to copy
   * @return a copy of the given map as a {@link CharArrayMap}. If the given map
@ -549,7 +584,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
   *         matchVersion will be of the given map will be preserved.
   */
  @SuppressWarnings("unchecked")
-  public static <V> CharArrayMap<V> copy(final Map<?,? extends V> map) {
+  public static <V> CharArrayMap<V> copy(final Version matchVersion, final Map<?,? extends V> map) {
    if(map == EMPTY_MAP)
      return emptyMap();
    if(map instanceof CharArrayMap) {
@ -565,7 +600,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
      m.values = values;
      return m;
    }
-    return new CharArrayMap<>(map, false);
+    return new CharArrayMap<>(matchVersion, map, false);
  }
  
  /** Returns an empty, unmodifiable map. */
@ -624,7 +659,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
   */
  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
    EmptyCharArrayMap() {
-      super(new CharArrayMap<V>(0, false));
+      super(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
    }
    
    @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
@ -22,6 +22,9 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.Set;

+import org.apache.lucene.util.Version;
+
+
 /**
 * A simple class that stores Strings as char[]'s in a
 * hash table.  Note that this is not a general purpose
@ -31,6 +34,18 @@ import java.util.Set;
 * is in the set without the necessity of converting it
 * to a String first.
 *
+ * <a name="version"></a>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating {@link CharArraySet}:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are
+ *       properly lowercased.</li>
+ * </ul>
+ * Before 3.1 supplementary characters could not be
+ * lowercased correctly due to the lack of Unicode 4
+ * support in JDK 1.4. To use instances of
+ * {@link CharArraySet} with the behavior before Lucene
+ * 3.1 pass a {@link Version} < 3.1 to the constructors.
 * <P>
 * <em>Please note:</em> This class implements {@link java.util.Set Set} but
 * does not behave like it should in all cases. The generic type is
@ -49,27 +64,33 @@ public class CharArraySet extends AbstractSet<Object> {
  /**
   * Create set with enough capacity to hold startSize terms
   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
   * @param startSize
   *          the initial capacity
   * @param ignoreCase
   *          <code>false</code> if and only if the set should be case sensitive
   *          otherwise <code>true</code>.
   */
-  public CharArraySet(int startSize, boolean ignoreCase) {
-    this(new CharArrayMap<>(startSize, ignoreCase));
+  public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
+    this(new CharArrayMap<>(matchVersion, startSize, ignoreCase));
  }

  /**
   * Creates a set from a Collection of objects. 
   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details.
   * @param c
   *          a collection whose elements to be placed into the set
   * @param ignoreCase
   *          <code>false</code> if and only if the set should be case sensitive
   *          otherwise <code>true</code>.
   */
-  public CharArraySet(Collection<?> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
+  public CharArraySet(Version matchVersion, Collection<?> c, boolean ignoreCase) {
+    this(matchVersion, c.size(), ignoreCase);
    addAll(c);
  }

@ -151,21 +172,32 @@ public class CharArraySet extends AbstractSet<Object> {
  /**
   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * <p>
+   * <b>Note:</b> If you intend to create a copy of another {@link CharArraySet} where
+   * the {@link Version} of the source set differs from its copy
+   * {@link #CharArraySet(Version, Collection, boolean)} should be used instead.
+   * The {@link #copy(Version, Set)} will preserve the {@link Version} of the
+   * source set it is an instance of {@link CharArraySet}.
+   * </p>
   * 
+   * @param matchVersion
+   *          compatibility match version see <a href="#version">Version
+   *          note</a> above for details. This argument will be ignored if the
+   *          given set is a {@link CharArraySet}.
   * @param set
   *          a set to copy
   * @return a copy of the given set as a {@link CharArraySet}. If the given set
   *         is a {@link CharArraySet} the ignoreCase property as well as the
   *         matchVersion will be of the given set will be preserved.
   */
-  public static CharArraySet copy(final Set<?> set) {
+  public static CharArraySet copy(final Version matchVersion, final Set<?> set) {
    if(set == EMPTY_SET)
      return EMPTY_SET;
    if(set instanceof CharArraySet) {
      final CharArraySet source = (CharArraySet) set;
-      return new CharArraySet(CharArrayMap.copy(source.map));
+      return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
    }
-    return new CharArraySet(set, false);
+    return new CharArraySet(matchVersion, set, false);
  }
  
  /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@ -18,12 +18,15 @@ package org.apache.lucene.analysis.util;
 */

 import java.io.IOException;
+import java.io.Reader;

 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.util.Version;
 import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;

 /**
@ -33,18 +36,25 @@ public abstract class CharTokenizer extends Tokenizer {
  
  /**
   * Creates a new {@link CharTokenizer} instance
+   * 
+   * @param matchVersion
+   *          Lucene version to match
   */
-  public CharTokenizer() {
+  public CharTokenizer(Version matchVersion) {
+    charUtils = CharacterUtils.getInstance(matchVersion);
  }
  
  /**
   * Creates a new {@link CharTokenizer} instance
   * 
+   * @param matchVersion
+   *          Lucene version to match
   * @param factory
   *          the attribute factory to use for this {@link Tokenizer}
   */
-  public CharTokenizer(AttributeFactory factory) {
+  public CharTokenizer(Version matchVersion, AttributeFactory factory) {
    super(factory);
+    charUtils = CharacterUtils.getInstance(matchVersion);
  }
  
  private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
@ -54,7 +64,7 @@ public abstract class CharTokenizer extends Tokenizer {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
+  private final CharacterUtils charUtils;
  private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
  
  /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
@ -34,25 +34,29 @@ public abstract class CharacterUtils {
  private static final Java5CharacterUtils JAVA_5 = new Java5CharacterUtils();

  /**
-   * Returns a {@link CharacterUtils} implementation.
+   * Returns a {@link CharacterUtils} implementation according to the given
+   * {@link Version} instance.
+   * 
+   * @param matchVersion
+   *          a version instance
   * @return a {@link CharacterUtils} implementation according to the given
   *         {@link Version} instance.
   */
-  public static CharacterUtils getInstance() {
+  public static CharacterUtils getInstance(final Version matchVersion) {
    return JAVA_5;
  }
  
-  /** 
-   * explicitly returns a version matching java 4 semantics 
-   * @deprecated Only for n-gram backwards compat
-   */
-  @Deprecated
+  /** explicitly returns a version matching java 4 semantics */
  public static CharacterUtils getJava4Instance() {
    return JAVA_4;
  }

  /**
   * Returns the code point at the given index of the {@link CharSequence}.
+   * Depending on the {@link Version} passed to
+   * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior
+   * of {@link Character#codePointAt(char[], int)} as it would have been
+   * available on a Java 1.4 JVM or on a later virtual machine version.
   * 
   * @param seq
   *          a character sequence
@ -71,6 +75,10 @@ public abstract class CharacterUtils {
  /**
   * Returns the code point at the given index of the char array where only elements
   * with index less than the limit are used.
+   * Depending on the {@link Version} passed to
+   * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior
+   * of {@link Character#codePointAt(char[], int)} as it would have been
+   * available on a Java 1.4 JVM or on a later virtual machine version.
   * 
   * @param chars
   *          a character array
@ -180,7 +188,10 @@ public abstract class CharacterUtils {
   * the middle of a surrogate pair, even if there are remaining characters in
   * the {@link Reader}.
   * <p>
-   * This method guarantees
+   * Depending on the {@link Version} passed to
+   * {@link CharacterUtils#getInstance(Version)} this method implements
+   * supplementary character awareness when filling the given buffer. For all
+   * {@link Version} &gt; 3.0 {@link #fill(CharacterBuffer, Reader, int)} guarantees
   * that the given {@link CharacterBuffer} will never contain a high surrogate
   * character as the last element in the buffer unless it is the last available
   * character in the reader. In other words, high and low surrogate pairs will
--- a/Show More
+++ b/Show More