*/
public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
+
/**
* File containing default Bulgarian stopwords.
*
@@ -84,15 +83,15 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public BulgarianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public BulgarianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*/
- public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public BulgarianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -100,10 +99,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
* If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter}
* before {@link BulgarianStemFilter}.
*/
- public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet)); }
+ public BulgarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
+ }
/**
* Creates a
@@ -119,10 +118,10 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
public TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new BulgarianStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
index cddd3920c24..3c4decb0461 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Brazilian Portuguese language.
@@ -44,7 +43,7 @@ import org.apache.lucene.util.Version;
* not be stemmed, but indexed).
*
*
- *
NOTE: This class uses the same {@link Version}
+ *
NOTE: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.
*/
public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
@@ -65,7 +64,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -83,35 +82,29 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
*/
- public BrazilianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public BrazilianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- super(matchVersion, stopwords);
+ public BrazilianAnalyzer(CharArraySet stopwords) {
+ super(stopwords);
}
/**
* Builds an analyzer with the given stop words and stemming exclusion words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords,
- CharArraySet stemExclusionSet) {
- this(matchVersion, stopwords);
- excltable = CharArraySet.unmodifiableSet(CharArraySet
- .copy(matchVersion, stemExclusionSet));
+ public BrazilianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ this(stopwords);
+ excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -126,10 +119,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
- result = new StandardFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
+ result = new StandardFilter(result);
+ result = new StopFilter(result, stopwords);
if(excltable != null && !excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
index 342348204a8..61ca46bb8a1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
@@ -46,7 +45,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"d", "l", "m", "n", "s", "t"
), true));
@@ -81,18 +80,17 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public CatalanAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public CatalanAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public CatalanAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -100,14 +98,12 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -124,11 +120,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new CatalanStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
index b10e1797863..ad304545195 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@@ -26,7 +26,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -29841,7 +29840,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
upperCaseVariantsAccepted.put("amp", "AMP");
}
private static final CharArrayMap entityValues
- = new CharArrayMap<>(Version.LUCENE_CURRENT, 253, false);
+ = new CharArrayMap<>(253, false);
static {
String[] entities = {
"AElig", "\u00C6", "Aacute", "\u00C1", "Acirc", "\u00C2",
@@ -29980,7 +29979,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
escapeSTYLE = true;
} else {
if (null == this.escapedTags) {
- this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
+ this.escapedTags = new CharArraySet(16, true);
}
this.escapedTags.add(tag);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
index 4ec0785f6f3..8c34577d8a6 100755
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@@ -24,7 +24,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.OpenStringBuilder;
@@ -195,7 +194,7 @@ InlineElment = ( [aAbBiIqQsSuU] |
escapeSTYLE = true;
} else {
if (null == this.escapedTags) {
- this.escapedTags = new CharArraySet(Version.LUCENE_CURRENT, 16, true);
+ this.escapedTags = new CharArraySet(16, true);
}
this.escapedTags.add(tag);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
index 958974c0dda..dda8e939d17 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cjk;
*/
import java.io.IOException;
-import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -28,7 +27,6 @@ import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},
@@ -37,6 +35,7 @@ import org.apache.lucene.util.Version;
* and filters stopwords with {@link StopFilter}
*/
public final class CJKAnalyzer extends StopwordAnalyzerBase {
+
/**
* File containing default CJK stopwords.
*
@@ -70,29 +69,27 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer which removes words in {@link #getDefaultStopSet()}.
*/
- public CJKAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public CJKAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){
- super(matchVersion, stopwords);
+ public CJKAnalyzer(CharArraySet stopwords){
+ super(stopwords);
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
+ final Tokenizer source = new StandardTokenizer();
// run the widthfilter first before bigramming, it sometimes combines characters.
TokenStream result = new CJKWidthFilter(source);
- result = new LowerCaseFilter(matchVersion, result);
+ result = new LowerCaseFilter(result);
result = new CJKBigramFilter(result);
- return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
index 8a89ae5a978..edee99c5636 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Sorani Kurdish.
@@ -62,7 +61,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,18 +73,17 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public SoraniAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public SoraniAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public SoraniAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -93,14 +91,12 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public SoraniAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -118,11 +114,11 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new SoraniNormalizationFilter(result);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SoraniStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
index 052878028e3..d04e1b8e525 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
@@ -27,7 +27,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/*
* TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors
@@ -78,7 +77,7 @@ public final class CommonGramsFilter extends TokenFilter {
* @param input TokenStream input in filter chain
* @param commonWords The set of common words.
*/
- public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) {
+ public CommonGramsFilter(TokenStream input, CharArraySet commonWords) {
super(input);
this.commonWords = commonWords;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
index 637568e8008..82765f45f69 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
@@ -76,7 +76,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso
@Override
public TokenFilter create(TokenStream input) {
- CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
+ CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords);
return commonGrams;
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
index 60866579ad7..b6718afdc9b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
@@ -17,31 +17,18 @@ package org.apache.lucene.analysis.compound;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.LinkedList;
-
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+import java.util.LinkedList;
/**
* Base class for decomposition token filters.
- *
- *
- *
- * You must specify the required {@link Version} compatibility when creating
- * CompoundWordTokenFilterBase:
- *
- *
As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- * supplementary characters in strings and char arrays provided as compound word
- * dictionaries.
- *
As of 4.4, {@link CompoundWordTokenFilterBase} doesn't update offsets.
- *
*/
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
/**
@@ -59,31 +46,29 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
*/
public static final int DEFAULT_MAX_SUBWORD_SIZE = 15;
- protected final Version matchVersion;
protected final CharArraySet dictionary;
protected final LinkedList tokens;
protected final int minWordSize;
protected final int minSubwordSize;
protected final int maxSubwordSize;
protected final boolean onlyLongestMatch;
-
+
protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-
- private AttributeSource.State current;
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) {
- this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+ private State current;
+
+ protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) {
+ this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
}
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary) {
- this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
+ protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary) {
+ this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
}
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ protected CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(input);
- this.matchVersion = matchVersion;
this.tokens=new LinkedList<>();
if (minWordSize < 0) {
throw new IllegalArgumentException("minWordSize cannot be negative");
@@ -100,7 +85,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
this.onlyLongestMatch=onlyLongestMatch;
this.dictionary = dictionary;
}
-
+
@Override
public final boolean incrementToken() throws IOException {
if (!tokens.isEmpty()) {
@@ -141,7 +126,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
tokens.clear();
current = null;
}
-
+
/**
* Helper class to hold decompounded token information
*/
@@ -154,20 +139,8 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
// offsets of the original word
- int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
- int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
-
- if (matchVersion.onOrAfter(Version.LUCENE_4_4) ||
- endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
- // if length by start + end offsets doesn't match the term text then assume
- // this is a synonym and don't adjust the offsets.
- this.startOffset = startOff;
- this.endOffset = endOff;
- } else {
- final int newStart = startOff + offset;
- this.startOffset = newStart;
- this.endOffset = newStart + length;
- }
+ this.startOffset = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
+ this.endOffset = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index e7d697c10f0..34e19b7d305 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -18,60 +18,39 @@ package org.apache.lucene.analysis.compound;
*/
-import java.util.Set;
-
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/**
- * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
+ * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
*
* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
- * "Donaudampfschiff" even when you only enter "schiff".
+ * "Donaudampfschiff" even when you only enter "schiff".
* It uses a brute-force algorithm to achieve this.
*
- * You must specify the required {@link Version} compatibility when creating
- * CompoundWordTokenFilterBase:
- *
- *
As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- * supplementary characters in strings and char arrays provided as compound word
- * dictionaries.
- *
*/
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
-
+
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param dictionary
* the word dictionary to match against.
*/
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) {
- super(matchVersion, input, dictionary);
+ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary) {
+ super(input, dictionary);
if (dictionary == null) {
throw new IllegalArgumentException("dictionary cannot be null");
}
}
-
+
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param dictionary
* the word dictionary to match against.
* @param minWordSize
@@ -83,9 +62,9 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
*/
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary,
- int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary,
+ int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
if (dictionary == null) {
throw new IllegalArgumentException("dictionary cannot be null");
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
index 09770b8f3cf..8c88c08cedd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
@@ -22,12 +22,13 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
import java.util.Map;
import java.io.IOException;
/**
- * Factory for {@link DictionaryCompoundWordTokenFilter}.
+ * Factory for {@link Lucene43DictionaryCompoundWordTokenFilter}.
*
* <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
@@ -50,9 +51,9 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
super(args);
assureMatchVersion();
dictFile = require(args, "dictionary");
- minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
- minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
- maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ minWordSize = getInt(args, "minWordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize = getInt(args, "minSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize = getInt(args, "maxSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -67,8 +68,13 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
@Override
public TokenStream create(TokenStream input) {
// if the dictionary is null, it means it was empty
- return dictionary == null ? input : new DictionaryCompoundWordTokenFilter
- (luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ if (dictionary == null) {
+ return input;
+ }
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new DictionaryCompoundWordTokenFilter(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ }
+ return new Lucene43DictionaryCompoundWordTokenFilter(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
index 909b3805e67..674bd813b26 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
@@ -17,67 +17,47 @@ package org.apache.lucene.analysis.compound;
* limitations under the License.
*/
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;
+import java.io.File;
+import java.io.IOException;
+
/**
- * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
- *
+ * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
+ *
* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
* "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
* grammar and a word dictionary to achieve this.
- *
- * You must specify the required {@link Version} compatibility when creating
- * CompoundWordTokenFilterBase:
- *
- *
As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- * supplementary characters in strings and char arrays provided as compound word
- * dictionaries.
- *
*/
public class HyphenationCompoundWordTokenFilter extends
CompoundWordTokenFilterBase {
private HyphenationTree hyphenator;
/**
- * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
+ * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param hyphenator
* the hyphenation pattern tree to use for hyphenation
* @param dictionary
* the word dictionary to match against.
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, CharArraySet dictionary) {
- this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, CharArraySet dictionary) {
+ this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
/**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
+ *
* @param input
- * the {@link TokenStream} to process
+ * the {@link org.apache.lucene.analysis.TokenStream} to process
* @param hyphenator
* the hyphenation pattern tree to use for hyphenation
* @param dictionary
@@ -91,10 +71,10 @@ public class HyphenationCompoundWordTokenFilter extends
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
- int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
+ int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch);
this.hyphenator = hyphenator;
@@ -103,36 +83,36 @@ public class HyphenationCompoundWordTokenFilter extends
/**
* Create a HyphenationCompoundWordTokenFilter with no dictionary.
*
- * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
+ * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean)
* HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
* null, minWordSize, minSubwordSize, maxSubwordSize }
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
- int maxSubwordSize) {
- this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
+ int maxSubwordSize) {
+ this(input, hyphenator, null, minWordSize, minSubwordSize,
maxSubwordSize, false);
}
-
+
/**
* Create a HyphenationCompoundWordTokenFilter with no dictionary.
*
- * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int)
- * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+ * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, int, int, int)
+ * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
* DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
*/
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator) {
- this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
+ public HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator) {
+ this(input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
DEFAULT_MAX_SUBWORD_SIZE);
}
/**
* Create a hyphenator tree
- *
+ *
* @param hyphenationFilename the filename of the XML grammar to load
* @return An object representing the hyphenation patterns
- * @throws IOException If there is a low-level I/O error.
+ * @throws java.io.IOException If there is a low-level I/O error.
*/
public static HyphenationTree getHyphenationTree(String hyphenationFilename)
throws IOException {
@@ -141,10 +121,10 @@ public class HyphenationCompoundWordTokenFilter extends
/**
* Create a hyphenator tree
- *
+ *
* @param hyphenationFile the file of the XML grammar to load
* @return An object representing the hyphenation patterns
- * @throws IOException If there is a low-level I/O error.
+ * @throws java.io.IOException If there is a low-level I/O error.
*/
public static HyphenationTree getHyphenationTree(File hyphenationFile)
throws IOException {
@@ -153,10 +133,10 @@ public class HyphenationCompoundWordTokenFilter extends
/**
* Create a hyphenator tree
- *
+ *
* @param hyphenationSource the InputSource pointing to the XML grammar
* @return An object representing the hyphenation patterns
- * @throws IOException If there is a low-level I/O error.
+ * @throws java.io.IOException If there is a low-level I/O error.
*/
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
throws IOException {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
index e1295c73f6f..d7e9b3368a8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.compound;
* limitations under the License.
*/
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -28,10 +29,12 @@ import org.apache.lucene.util.IOUtils;
import java.util.Map;
import java.io.IOException;
import java.io.InputStream;
+
+import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;
/**
- * Factory for {@link HyphenationCompoundWordTokenFilter}.
+ * Factory for {@link Lucene43HyphenationCompoundWordTokenFilter}.
*
* This factory accepts the following parameters:
*
@@ -55,7 +58,7 @@ import org.xml.sax.InputSource;
* </analyzer>
* </fieldType>
*
- * @see HyphenationCompoundWordTokenFilter
+ * @see Lucene43HyphenationCompoundWordTokenFilter
*/
public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
@@ -75,9 +78,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactor
dictFile = get(args, "dictionary");
encoding = get(args, "encoding");
hypFile = require(args, "hyphenator");
- minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
- minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
- maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ minWordSize = getInt(args, "minWordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize = getInt(args, "minSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize = getInt(args, "maxSubwordSize", Lucene43CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -96,14 +99,21 @@ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactor
final InputSource is = new InputSource(stream);
is.setEncoding(encoding); // if it's null let xml parser decide
is.setSystemId(hypFile);
- hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+ } else {
+ hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+ }
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
@Override
- public HyphenationCompoundWordTokenFilter create(TokenStream input) {
- return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ public TokenFilter create(TokenStream input) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ }
+ return new Lucene43HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java
new file mode 100644
index 00000000000..e5b40703873
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43CompoundWordTokenFilterBase.java
@@ -0,0 +1,162 @@
+package org.apache.lucene.analysis.compound;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Base class for decomposition token filters using pre-4.4 behavior.
+ *
+ * @deprecated Use {@link CompoundWordTokenFilterBase}
+ */
+@Deprecated
+public abstract class Lucene43CompoundWordTokenFilterBase extends TokenFilter {
+ /**
+ * The default for minimal word length that gets decomposed
+ */
+ public static final int DEFAULT_MIN_WORD_SIZE = 5;
+
+ /**
+ * The default for minimal length of subwords that get propagated to the output of this filter
+ */
+ public static final int DEFAULT_MIN_SUBWORD_SIZE = 2;
+
+ /**
+ * The default for maximal length of subwords that get propagated to the output of this filter
+ */
+ public static final int DEFAULT_MAX_SUBWORD_SIZE = 15;
+
+ protected final CharArraySet dictionary;
+ protected final LinkedList tokens;
+ protected final int minWordSize;
+ protected final int minSubwordSize;
+ protected final int maxSubwordSize;
+ protected final boolean onlyLongestMatch;
+
+ protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+
+ private AttributeSource.State current;
+
+ protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) {
+ this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
+ }
+
+ protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary) {
+ this(input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
+ }
+
+ protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ super(input);
+ this.tokens=new LinkedList<>();
+ if (minWordSize < 0) {
+ throw new IllegalArgumentException("minWordSize cannot be negative");
+ }
+ this.minWordSize=minWordSize;
+ if (minSubwordSize < 0) {
+ throw new IllegalArgumentException("minSubwordSize cannot be negative");
+ }
+ this.minSubwordSize=minSubwordSize;
+ if (maxSubwordSize < 0) {
+ throw new IllegalArgumentException("maxSubwordSize cannot be negative");
+ }
+ this.maxSubwordSize=maxSubwordSize;
+ this.onlyLongestMatch=onlyLongestMatch;
+ this.dictionary = dictionary;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (!tokens.isEmpty()) {
+ assert current != null;
+ CompoundToken token = tokens.removeFirst();
+ restoreState(current); // keep all other attributes untouched
+ termAtt.setEmpty().append(token.txt);
+ offsetAtt.setOffset(token.startOffset, token.endOffset);
+ posIncAtt.setPositionIncrement(0);
+ return true;
+ }
+
+ current = null; // not really needed, but for safety
+ if (input.incrementToken()) {
+ // Only words longer than minWordSize get processed
+ if (termAtt.length() >= this.minWordSize) {
+ decompose();
+ // only capture the state if we really need it for producing new tokens
+ if (!tokens.isEmpty()) {
+ current = captureState();
+ }
+ }
+ // return original token:
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /** Decomposes the current {@link #termAtt} and places {@link CompoundToken} instances in the {@link #tokens} list.
+ * The original token may not be placed in the list, as it is automatically passed through this filter.
+ */
+ protected abstract void decompose();
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ tokens.clear();
+ current = null;
+ }
+
+ /**
+ * Helper class to hold decompounded token information
+ */
+ protected class CompoundToken {
+ public final CharSequence txt;
+ public final int startOffset, endOffset;
+
+ /** Construct the compound token based on a slice of the current {@link Lucene43CompoundWordTokenFilterBase#termAtt}. */
+ public CompoundToken(int offset, int length) {
+ this.txt = Lucene43CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
+
+ // offsets of the original word
+ int startOff = Lucene43CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
+ int endOff = Lucene43CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
+
+ if (endOff - startOff != Lucene43CompoundWordTokenFilterBase.this.termAtt.length()) {
+ // if length by start + end offsets doesn't match the term text then assume
+ // this is a synonym and don't adjust the offsets.
+ this.startOffset = startOff;
+ this.endOffset = endOff;
+ } else {
+ final int newStart = startOff + offset;
+ this.startOffset = newStart;
+ this.endOffset = newStart + length;
+ }
+ }
+
+ }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java
new file mode 100644
index 00000000000..ec856e1924c
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/Lucene43DictionaryCompoundWordTokenFilter.java
@@ -0,0 +1,100 @@
+package org.apache.lucene.analysis.compound;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+/**
+ * A {@link TokenFilter} that decomposes compound words found in many Germanic languages, using
+ * pre-4.4 behavior.
+ * @deprecated Use {@link DictionaryCompoundWordTokenFilter}.
+ */
+@Deprecated
+public class Lucene43DictionaryCompoundWordTokenFilter extends Lucene43CompoundWordTokenFilterBase {
+
+ /**
+ * Creates a new {@link Lucene43DictionaryCompoundWordTokenFilter}
+ *
+ * @param input
+ * the {@link TokenStream} to process
+ * @param dictionary
+ * the word dictionary to match against.
+ */
+ public Lucene43DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary) {
+ super(input, dictionary);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("dictionary cannot be null");
+ }
+ }
+
+ /**
+ * Creates a new {@link Lucene43DictionaryCompoundWordTokenFilter}
+ *
+ * @param input
+ * the {@link TokenStream} to process
+ * @param dictionary
+ * the word dictionary to match against.
+ * @param minWordSize
+ * only words longer than this get processed
+ * @param minSubwordSize
+ * only subwords longer than this get to the output stream
+ * @param maxSubwordSize
+ * only subwords shorter than this get to the output stream
+ * @param onlyLongestMatch
+ * Add only the longest matching subword to the stream
+ */
+ public Lucene43DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet dictionary,
+ int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
+ super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("dictionary cannot be null");
+ }
+ }
+
+ @Override
+ protected void decompose() {
+ final int len = termAtt.length();
+ for (int i=0;i<=len-this.minSubwordSize;++i) {
+ CompoundToken longestMatchToken=null;
+ for (int j=this.minSubwordSize;j<=this.maxSubwordSize;++j) {
+ if(i+j>len) {
+ break;
+ }
+ if(dictionary.contains(termAtt.buffer(), i, j)) {
+ if (this.onlyLongestMatch) {
+ if (longestMatchToken!=null) {
+ if (longestMatchToken.txt.length()
+ * Calls {@link #Lucene43HyphenationCompoundWordTokenFilter(TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
+ * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+ * null, minWordSize, minSubwordSize, maxSubwordSize }
+ */
+ public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
+ int maxSubwordSize) {
+ this(input, hyphenator, null, minWordSize, minSubwordSize,
+ maxSubwordSize, false);
+ }
+
+ /**
+ * Create a HyphenationCompoundWordTokenFilter with no dictionary.
+ *
+ * Calls {@link #Lucene43HyphenationCompoundWordTokenFilter(TokenStream, HyphenationTree, int, int, int)
+ * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+ * DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
+ */
+ public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
+ HyphenationTree hyphenator) {
+ this(input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
+ DEFAULT_MAX_SUBWORD_SIZE);
+ }
+
+ /**
+ * Create a hyphenator tree
+ *
+ * @param hyphenationFilename the filename of the XML grammar to load
+ * @return An object representing the hyphenation patterns
+ * @throws IOException If there is a low-level I/O error.
+ */
+ public static HyphenationTree getHyphenationTree(String hyphenationFilename)
+ throws IOException {
+ return getHyphenationTree(new InputSource(hyphenationFilename));
+ }
+
+ /**
+ * Create a hyphenator tree
+ *
+ * @param hyphenationFile the file of the XML grammar to load
+ * @return An object representing the hyphenation patterns
+ * @throws IOException If there is a low-level I/O error.
+ */
+ public static HyphenationTree getHyphenationTree(File hyphenationFile)
+ throws IOException {
+ return getHyphenationTree(new InputSource(hyphenationFile.toURI().toASCIIString()));
+ }
+
+ /**
+ * Create a hyphenator tree
+ *
+ * @param hyphenationSource the InputSource pointing to the XML grammar
+ * @return An object representing the hyphenation patterns
+ * @throws IOException If there is a low-level I/O error.
+ */
+ public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
+ throws IOException {
+ HyphenationTree tree = new HyphenationTree();
+ tree.loadPatterns(hyphenationSource);
+ return tree;
+ }
+
+ @Override
+ protected void decompose() {
+ // get the hyphenation points
+ Hyphenation hyphens = hyphenator.hyphenate(termAtt.buffer(), 0, termAtt.length(), 1, 1);
+ // No hyphen points found -> exit
+ if (hyphens == null) {
+ return;
+ }
+
+ final int[] hyp = hyphens.getHyphenationPoints();
+
+ for (int i = 0; i < hyp.length; ++i) {
+ int remaining = hyp.length - i;
+ int start = hyp[i];
+ CompoundToken longestMatchToken = null;
+ for (int j = 1; j < remaining; j++) {
+ int partLength = hyp[i + j] - start;
+
+ // if the part is longer than maxSubwordSize we
+ // are done with this round
+ if (partLength > this.maxSubwordSize) {
+ break;
+ }
+
+ // we only put subwords to the token stream
+ // that are longer than minPartSize
+ if (partLength < this.minSubwordSize) {
+ // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
+ // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
+ continue;
+ }
+
+ // check the dictionary
+ if (dictionary == null || dictionary.contains(termAtt.buffer(), start, partLength)) {
+ if (this.onlyLongestMatch) {
+ if (longestMatchToken != null) {
+ if (longestMatchToken.txt.length() < partLength) {
+ longestMatchToken = new CompoundToken(start, partLength);
+ }
+ } else {
+ longestMatchToken = new CompoundToken(start, partLength);
+ }
+ } else {
+ tokens.add(new CompoundToken(start, partLength));
+ }
+ } else if (dictionary.contains(termAtt.buffer(), start, partLength - 1)) {
+ // check the dictionary again with a word that is one character
+ // shorter
+ // to avoid problems with genitive 's characters and other binding
+ // characters
+ if (this.onlyLongestMatch) {
+ if (longestMatchToken != null) {
+ if (longestMatchToken.txt.length() < partLength - 1) {
+ longestMatchToken = new CompoundToken(start, partLength - 1);
+ }
+ } else {
+ longestMatchToken = new CompoundToken(start, partLength - 1);
+ }
+ } else {
+ tokens.add(new CompoundToken(start, partLength - 1));
+ }
+ }
+ }
+ if (this.onlyLongestMatch && longestMatchToken!=null) {
+ tokens.add(longestMatchToken);
+ }
+ }
+ }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
index 888930f16ac..6002ea99309 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Analyzer;
/**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
index 876a6160f73..9997d40155a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
@@ -18,13 +18,11 @@ package org.apache.lucene.analysis.core;
*/
import java.io.IOException;
-import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
/**
* Emits the entire input as a single token.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
index c29bcd50992..8c5588626f6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
-import java.io.Reader;
import java.util.Map;
/**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
index e0437b3d467..5c0b6d2bcc9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.Version;
/**
* A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
@@ -30,41 +29,25 @@ import org.apache.lucene.util.Version;
* Note: this does a decent job for most European languages, but does a terrible
* job for some Asian languages, where words are not separated by spaces.
*
- *
- *
- * You must specify the required {@link Version} compatibility when creating
- * {@link LetterTokenizer}:
- *
- *
As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
- * {@link CharTokenizer#normalize(int)} for details.
- *
- *
*/
public class LetterTokenizer extends CharTokenizer {
/**
* Construct a new LetterTokenizer.
- *
- * @param matchVersion
- * Lucene version to match See {@link above}
*/
- public LetterTokenizer(Version matchVersion) {
- super(matchVersion);
+ public LetterTokenizer() {
}
/**
* Construct a new LetterTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
- * @param matchVersion
- * Lucene version to match See {@link above}
* @param factory
* the attribute factory to use for this {@link Tokenizer}
*/
- public LetterTokenizer(Version matchVersion, AttributeFactory factory) {
- super(matchVersion, factory);
+ public LetterTokenizer(AttributeFactory factory) {
+ super(factory);
}
/** Collects only characters which satisfy
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
index 4a06f3127d8..11dae66d2b2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
@@ -36,7 +36,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {
/** Creates a new LetterTokenizerFactory */
public LetterTokenizerFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -44,6 +43,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {
@Override
public LetterTokenizer create(AttributeFactory factory) {
- return new LetterTokenizer(luceneMatchVersion, factory);
+ return new LetterTokenizer(factory);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
index 1b0ffa408f9..7a7e96898a9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@@ -23,30 +23,21 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
/**
* Normalizes token text to lower case.
- *
- *
You must specify the required {@link Version}
- * compatibility when creating LowerCaseFilter:
- *
- *
As of 3.1, supplementary characters are properly lowercased.
- *
*/
public final class LowerCaseFilter extends TokenFilter {
- private final CharacterUtils charUtils;
+ private final CharacterUtils charUtils = CharacterUtils.getInstance();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new LowerCaseFilter, that normalizes token text to lower case.
*
- * @param matchVersion See above
* @param in TokenStream to filter
*/
- public LowerCaseFilter(Version matchVersion, TokenStream in) {
+ public LowerCaseFilter(TokenStream in) {
super(in);
- charUtils = CharacterUtils.getInstance(matchVersion);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
index 244722efcc0..ded2966292b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
@@ -40,7 +40,6 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT
/** Creates a new LowerCaseFilterFactory */
public LowerCaseFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -48,7 +47,7 @@ public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiT
@Override
public LowerCaseFilter create(TokenStream input) {
- return new LowerCaseFilter(luceneMatchVersion,input);
+ return new LowerCaseFilter(input);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
index d61e1a938d9..66586f77154 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@@ -17,13 +17,8 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
/**
* LowerCaseTokenizer performs the function of LetterTokenizer
@@ -35,41 +30,24 @@ import org.apache.lucene.util.Version;
* Note: this does a decent job for most European languages, but does a terrible
* job for some Asian languages, where words are not separated by spaces.
*
- *
- *
- * You must specify the required {@link Version} compatibility when creating
- * {@link LowerCaseTokenizer}:
- *
- *
As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
- * {@link CharTokenizer#normalize(int)} for details.
- *
- *
*/
public final class LowerCaseTokenizer extends LetterTokenizer {
/**
* Construct a new LowerCaseTokenizer.
- *
- * @param matchVersion
- * Lucene version to match See {@link above}
- *
*/
- public LowerCaseTokenizer(Version matchVersion) {
- super(matchVersion);
+ public LowerCaseTokenizer() {
}
/**
* Construct a new LowerCaseTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
- * @param matchVersion
- * Lucene version to match See {@link above}
* @param factory
* the attribute factory to use for this {@link Tokenizer}
*/
- public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory) {
- super(matchVersion, factory);
+ public LowerCaseTokenizer(AttributeFactory factory) {
+ super(factory);
}
/** Converts char to lower case
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
index 4af9a10484c..68b3c049722 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
@@ -39,7 +39,6 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
/** Creates a new LowerCaseTokenizerFactory */
public LowerCaseTokenizerFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -47,7 +46,7 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
@Override
public LowerCaseTokenizer create(AttributeFactory factory) {
- return new LowerCaseTokenizer(luceneMatchVersion, factory);
+ return new LowerCaseTokenizer(factory);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
index bc9a69b7f64..503b95ae72a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@@ -17,38 +17,21 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.util.Version;
/** An {@link Analyzer} that filters {@link LetterTokenizer}
* with {@link LowerCaseFilter}
- *
**/
public final class SimpleAnalyzer extends Analyzer {
- private final Version matchVersion;
-
/**
* Creates a new {@link SimpleAnalyzer}
- * @param matchVersion Lucene version to match See {@link above}
*/
- public SimpleAnalyzer(Version matchVersion) {
- this.matchVersion = matchVersion;
+ public SimpleAnalyzer() {
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion));
+ return new TokenStreamComponents(new LowerCaseTokenizer());
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
index fe85bc82e7a..102618f84be 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@@ -27,20 +27,10 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
-import org.apache.lucene.util.Version;
-
-/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating StopAnalyzer:
- *
- *
As of 3.1, StopFilter correctly handles Unicode 4.0
- * supplementary characters in stopwords
- *
As of 2.9, position increments are preserved
- *
-*/
+/**
+ * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
+ */
public final class StopAnalyzer extends StopwordAnalyzerBase {
/** An unmodifiable set containing some common English words that are not usually useful
@@ -55,40 +45,35 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with"
);
- final CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT,
- stopWords, false);
+ final CharArraySet stopSet = new CharArraySet(stopWords, false);
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
}
/** Builds an analyzer which removes words in
* {@link #ENGLISH_STOP_WORDS_SET}.
- * @param matchVersion See above
*/
- public StopAnalyzer(Version matchVersion) {
- this(matchVersion, ENGLISH_STOP_WORDS_SET);
+ public StopAnalyzer() {
+ this(ENGLISH_STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given set.
- * @param matchVersion See above
* @param stopWords Set of stop words */
- public StopAnalyzer(Version matchVersion, CharArraySet stopWords) {
- super(matchVersion, stopWords);
+ public StopAnalyzer(CharArraySet stopWords) {
+ super(stopWords);
}
/** Builds an analyzer with the stop words from the given file.
- * @see WordlistLoader#getWordSet(Reader, Version)
- * @param matchVersion See above
+ * @see WordlistLoader#getWordSet(Reader)
* @param stopwordsFile File to load stop words from */
- public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
- this(matchVersion, loadStopwordSet(stopwordsFile, matchVersion));
+ public StopAnalyzer(File stopwordsFile) throws IOException {
+ this(loadStopwordSet(stopwordsFile));
}
/** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader, Version)
- * @param matchVersion See above
+ * @see WordlistLoader#getWordSet(Reader)
* @param stopwords Reader to load stop words from */
- public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
- this(matchVersion, loadStopwordSet(stopwords, matchVersion));
+ public StopAnalyzer(Reader stopwords) throws IOException {
+ this(loadStopwordSet(stopwords));
}
/**
@@ -102,9 +87,8 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new LowerCaseTokenizer(matchVersion);
- return new TokenStreamComponents(source, new StopFilter(matchVersion,
- source, stopwords));
+ final Tokenizer source = new LowerCaseTokenizer();
+ return new TokenStreamComponents(source, new StopFilter(source, stopwords));
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
index 536d253671b..2c3f000e25d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
@@ -24,19 +24,9 @@ import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/**
* Removes stop words from a token stream.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating StopFilter:
- *
- *
As of 3.1, StopFilter correctly handles Unicode 4.0
- * supplementary characters in stopwords and position
- * increments are preserved
- *
*/
public final class StopFilter extends FilteringTokenFilter {
@@ -47,17 +37,14 @@ public final class StopFilter extends FilteringTokenFilter {
* Constructs a filter which removes words from the input TokenStream that are
* named in the Set.
*
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the stop
- * set if Version > 3.0. See above for details.
* @param in
* Input stream
* @param stopWords
* A {@link CharArraySet} representing the stopwords.
- * @see #makeStopSet(Version, java.lang.String...)
+ * @see #makeStopSet(java.lang.String...)
*/
- public StopFilter(Version matchVersion, TokenStream in, CharArraySet stopWords) {
- super(matchVersion, in);
+ public StopFilter(TokenStream in, CharArraySet stopWords) {
+ super(in);
this.stopWords = stopWords;
}
@@ -67,12 +54,11 @@ public final class StopFilter extends FilteringTokenFilter {
* This permits this stopWords construction to be cached once when
* an Analyzer is constructed.
*
- * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords An array of stopwords
- * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+ * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
*/
- public static CharArraySet makeStopSet(Version matchVersion, String... stopWords) {
- return makeStopSet(matchVersion, stopWords, false);
+ public static CharArraySet makeStopSet(String... stopWords) {
+ return makeStopSet(stopWords, false);
}
/**
@@ -81,38 +67,35 @@ public final class StopFilter extends FilteringTokenFilter {
* This permits this stopWords construction to be cached once when
* an Analyzer is constructed.
*
- * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
* @return A Set ({@link CharArraySet}) containing the words
- * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
+ * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
*/
- public static CharArraySet makeStopSet(Version matchVersion, List> stopWords) {
- return makeStopSet(matchVersion, stopWords, false);
+ public static CharArraySet makeStopSet(List> stopWords) {
+ return makeStopSet(stopWords, false);
}
/**
* Creates a stopword set from the given stopword array.
*
- * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords An array of stopwords
* @param ignoreCase If true, all words are lower cased first.
* @return a Set containing the words
*/
- public static CharArraySet makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
- CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
+ public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
+ CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
stopSet.addAll(Arrays.asList(stopWords));
return stopSet;
}
/**
* Creates a stopword set from the given stopword list.
- * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0
* @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
* @param ignoreCase if true, all words are lower cased first
* @return A Set ({@link CharArraySet}) containing the words
*/
- public static CharArraySet makeStopSet(Version matchVersion, List> stopWords, boolean ignoreCase){
- CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
+ public static CharArraySet makeStopSet(List> stopWords, boolean ignoreCase){
+ CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
stopSet.addAll(stopWords);
return stopSet;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
index 5e3c7e87fef..7bf32429b1f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@@ -81,7 +81,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
/** Creates a new StopFilterFactory */
public StopFilterFactory(Map args) {
super(args);
- assureMatchVersion();
stopWordFiles = get(args, "words");
format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
ignoreCase = getBoolean(args, "ignoreCase", false);
@@ -104,7 +103,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
if (null != format) {
throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
}
- stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+ stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
}
}
@@ -118,7 +117,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
@Override
public TokenStream create(TokenStream input) {
- StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
+ StopFilter stopFilter = new StopFilter(input,stopWords);
return stopFilter;
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
index 9c6bcbab744..d2791dfbf95 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
@@ -22,7 +22,6 @@ import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
-import org.apache.lucene.util.Version;
/**
* Removes tokens whose types appear in a set of blocked types from a token stream.
@@ -35,14 +34,13 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
/**
* Create a new {@link TypeTokenFilter}.
- * @param version the Lucene match version
* @param input the {@link TokenStream} to consume
* @param stopTypes the types to filter
* @param useWhiteList if true, then tokens whose type is in stopTypes will
* be kept, otherwise they will be filtered out
*/
- public TypeTokenFilter(Version version, TokenStream input, Set stopTypes, boolean useWhiteList) {
- super(version, input);
+ public TypeTokenFilter(TokenStream input, Set stopTypes, boolean useWhiteList) {
+ super(input);
this.stopTypes = stopTypes;
this.useWhiteList = useWhiteList;
}
@@ -50,10 +48,9 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
/**
* Create a new {@link TypeTokenFilter} that filters tokens out
* (useWhiteList=false).
- * @see #TypeTokenFilter(Version, TokenStream, Set, boolean)
*/
- public TypeTokenFilter(Version version, TokenStream input, Set stopTypes) {
- this(version, input, stopTypes, false);
+ public TypeTokenFilter(TokenStream input, Set stopTypes) {
+ this(input, stopTypes, false);
}
/**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
index 0545d754133..089ef7adb83 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
@@ -72,7 +72,7 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour
@Override
public TokenStream create(TokenStream input) {
- final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, input, stopTypes, useWhitelist);
+ final TokenStream filter = new TypeTokenFilter(input, stopTypes, useWhitelist);
return filter;
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
index 2625d4f5ebf..6fdae1b685a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
@@ -23,13 +23,9 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
/**
* Normalizes token text to UPPER CASE.
- *
- *
You must specify the required {@link Version}
- * compatibility when creating UpperCaseFilter
*
*
NOTE: In Unicode, this transformation may lose information when the
* upper case character represents more than one lower case character. Use this filter
@@ -37,18 +33,16 @@ import org.apache.lucene.util.Version;
* general search matching
*/
public final class UpperCaseFilter extends TokenFilter {
- private final CharacterUtils charUtils;
+ private final CharacterUtils charUtils = CharacterUtils.getInstance();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new UpperCaseFilter, that normalizes token text to upper case.
*
- * @param matchVersion See above
* @param in TokenStream to filter
*/
- public UpperCaseFilter(Version matchVersion, TokenStream in) {
+ public UpperCaseFilter(TokenStream in) {
super(in);
- charUtils = CharacterUtils.getInstance(matchVersion);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
index 60f1119405a..ac97ad7bd0a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilterFactory.java
@@ -45,7 +45,6 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT
/** Creates a new UpperCaseFilterFactory */
public UpperCaseFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -53,7 +52,7 @@ public class UpperCaseFilterFactory extends TokenFilterFactory implements MultiT
@Override
public UpperCaseFilter create(TokenStream input) {
- return new UpperCaseFilter(luceneMatchVersion,input);
+ return new UpperCaseFilter(input);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
index 2fdc3f3dcc3..855f4f6a88c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
@@ -17,38 +17,21 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.util.Version;
/**
* An Analyzer that uses {@link WhitespaceTokenizer}.
- *
**/
public final class WhitespaceAnalyzer extends Analyzer {
- private final Version matchVersion;
-
/**
* Creates a new {@link WhitespaceAnalyzer}
- * @param matchVersion Lucene version to match See {@link above}
*/
- public WhitespaceAnalyzer(Version matchVersion) {
- this.matchVersion = matchVersion;
+ public WhitespaceAnalyzer() {
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion));
+ return new TokenStreamComponents(new WhitespaceTokenizer());
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
index 354322c444d..f38b07aed64 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
@@ -17,50 +17,31 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
/**
* A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
- * Adjacent sequences of non-Whitespace characters form tokens.
- *
- * You must specify the required {@link Version} compatibility when creating
- * {@link WhitespaceTokenizer}:
- *
- *
As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
- * {@link CharTokenizer#normalize(int)} for details.
- *
+ * Adjacent sequences of non-Whitespace characters form tokens.
*/
public final class WhitespaceTokenizer extends CharTokenizer {
/**
- * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version
- * to match See {@link above}
- *
+ * Construct a new WhitespaceTokenizer.
*/
- public WhitespaceTokenizer(Version matchVersion) {
- super(matchVersion);
+ public WhitespaceTokenizer() {
}
/**
* Construct a new WhitespaceTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
- * @param
- * matchVersion Lucene version to match See
- * {@link above}
* @param factory
* the attribute factory to use for this {@link Tokenizer}
*/
- public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory) {
- super(matchVersion, factory);
+ public WhitespaceTokenizer(AttributeFactory factory) {
+ super(factory);
}
/** Collects only characters which do not satisfy
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
index e23ee869665..708996362a7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
-import java.io.Reader;
import java.util.Map;
/**
@@ -37,7 +36,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
/** Creates a new WhitespaceTokenizerFactory */
public WhitespaceTokenizerFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -45,6 +43,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
@Override
public WhitespaceTokenizer create(AttributeFactory factory) {
- return new WhitespaceTokenizer(luceneMatchVersion, factory);
+ return new WhitespaceTokenizer(factory);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
index b54739be60e..e8f49ef3616 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import java.io.*;
import java.nio.charset.StandardCharsets;
@@ -61,7 +60,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -75,34 +74,30 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
- *
- * @param matchVersion Lucene version to match
*/
- public CzechAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+ public CzechAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion Lucene version to match
* @param stopwords a stopword set
*/
- public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public CzechAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words and a set of work to be
* excluded from the {@link CzechStemFilter}.
*
- * @param matchVersion Lucene version to match
* @param stopwords a stopword set
* @param stemExclusionTable a stemming exclusion set
*/
- public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) {
- super(matchVersion, stopwords);
- this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+ public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable) {
+ super(stopwords);
+ this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
}
/**
@@ -115,16 +110,16 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
* a stem exclusion set is provided via
- * {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a
+ * {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a
* {@link SetKeywordMarkerFilter} is added before
* {@link CzechStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter( matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!this.stemExclusionTable.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionTable);
result = new CzechStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
index 00f7520af9f..7f2720addc2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.DanishStemmer;
/**
@@ -64,7 +63,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public DanishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public DanishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public DanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public DanishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public DanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public DanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new DanishStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
index 6cab61ea1f4..f2d29b4385f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@@ -36,7 +36,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for German language.
@@ -48,7 +47,7 @@ import org.apache.lucene.util.Version;
* exclusion list is empty by default.
*
*
- *
NOTE: This class uses the same {@link Version}
+ *
NOTE: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.
*/
public final class GermanAnalyzer extends StopwordAnalyzerBase {
@@ -69,7 +68,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -91,35 +90,31 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
* Builds an analyzer with the default stop words:
* {@link #getDefaultStopSet()}.
*/
- public GermanAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+ public GermanAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public GermanAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public GermanAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet
* a stemming exclusion set
*/
- public GermanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+ public GermanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -135,10 +130,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter( matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
result = new SetKeywordMarkerFilter(result, exclusionSet);
result = new GermanNormalizationFilter(result);
result = new GermanLightStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
index 4f418ee183f..c80c27200b8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@@ -28,7 +28,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for the Greek language.
@@ -38,7 +37,7 @@ import org.apache.lucene.util.Version;
* A default set of stopwords is used unless an alternative list is specified.
*
*
- *
NOTE: This class uses the same {@link Version}
+ *
NOTE: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.
*/
public final class GreekAnalyzer extends StopwordAnalyzerBase {
@@ -69,10 +68,9 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words.
- * @param matchVersion Lucene compatibility version
*/
- public GreekAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+ public GreekAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_SET);
}
/**
@@ -81,11 +79,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
* NOTE: The stopwords set should be pre-processed with the logic of
* {@link GreekLowerCaseFilter} for best results.
*
- * @param matchVersion Lucene compatibility version
* @param stopwords a stopword set
*/
- public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) {
- super(matchVersion, stopwords);
+ public GreekAnalyzer(CharArraySet stopwords) {
+ super(stopwords);
}
/**
@@ -100,10 +97,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
- result = new StandardFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new GreekLowerCaseFilter(source);
+ result = new StandardFilter(result);
+ result = new StopFilter(result, stopwords);
result = new GreekStemFilter(result);
return new TokenStreamComponents(source, result);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
index ba0a20ac29e..66d4aa6a602 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
@@ -22,32 +22,22 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
/**
* Normalizes token text to lower case, removes some Greek diacritics,
* and standardizes final sigma to sigma.
- *
- *
You must specify the required {@link Version}
- * compatibility when creating GreekLowerCaseFilter:
- *
- *
As of 3.1, supplementary characters are properly lowercased.
- *
*/
public final class GreekLowerCaseFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final CharacterUtils charUtils;
+ private final CharacterUtils charUtils = CharacterUtils.getInstance();
/**
* Create a GreekLowerCaseFilter that normalizes Greek token text.
*
- * @param matchVersion Lucene compatibility version,
- * See above
* @param in TokenStream to filter
*/
- public GreekLowerCaseFilter(Version matchVersion, TokenStream in) {
+ public GreekLowerCaseFilter(TokenStream in) {
super(in);
- this.charUtils = CharacterUtils.getInstance(matchVersion);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
index 15b6f9251c1..5ff0c90f63e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilterFactory.java
@@ -40,7 +40,6 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M
/** Creates a new GreekLowerCaseFilterFactory */
public GreekLowerCaseFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -48,7 +47,7 @@ public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements M
@Override
public GreekLowerCaseFilter create(TokenStream in) {
- return new GreekLowerCaseFilter(luceneMatchVersion, in);
+ return new GreekLowerCaseFilter(in);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
index f714e54c3d0..750bd3589ae 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
@@ -1,7 +1,6 @@
package org.apache.lucene.analysis.el;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import java.util.Arrays;
@@ -205,7 +204,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc4 = new CharArraySet(
Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"),
false);
@@ -231,7 +230,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc6 = new CharArraySet(
Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ",
"αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ",
"μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ",
@@ -256,7 +255,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc7 = new CharArraySet(
Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ",
"πεθ", "πικρ", "ποτ", "σιχ", "χ"),
false);
@@ -283,11 +282,11 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc8a = new CharArraySet(
Arrays.asList("τρ", "τσ"),
false);
- private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc8b = new CharArraySet(
Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ",
"καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ",
"π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ",
@@ -346,7 +345,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc9 = new CharArraySet(
Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ",
"βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ",
"σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"),
@@ -434,11 +433,11 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc12a = new CharArraySet(
Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"),
false);
- private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc12b = new CharArraySet(
Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"),
false);
@@ -458,7 +457,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc13 = new CharArraySet(
Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"),
false);
@@ -492,7 +491,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc14 = new CharArraySet(
Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ",
"λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ",
"ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε",
@@ -530,7 +529,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc15a = new CharArraySet(
Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ",
"αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ",
"ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ",
@@ -539,7 +538,7 @@ public class GreekStemmer {
"ουλαμ", "ουρ", "π", "τρ", "μ"),
false);
- private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc15b = new CharArraySet(
Arrays.asList("ψοφ", "ναυλοχ"),
false);
@@ -576,7 +575,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc16 = new CharArraySet(
Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"),
false);
@@ -596,7 +595,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc17 = new CharArraySet(
Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"),
false);
@@ -610,7 +609,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc18 = new CharArraySet(
Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"),
false);
@@ -634,7 +633,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_CURRENT,
+ private static final CharArraySet exc19 = new CharArraySet(
Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"),
false);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
index 934540a3ec9..15bfb51a518 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@@ -30,7 +30,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for English.
@@ -57,18 +56,17 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
*/
- public EnglishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public EnglishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public EnglishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -76,14 +74,12 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -101,11 +97,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new EnglishPossessiveFilter(matchVersion, result);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new EnglishPossessiveFilter(result);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new PorterStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
index 9f6f21884d2..e4e03a1c1b3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
@@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
/**
* TokenFilter that removes possessives (trailing 's) from words.
@@ -30,8 +29,7 @@ import org.apache.lucene.util.Version;
public final class EnglishPossessiveFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- // NOTE: version now unused
- public EnglishPossessiveFilter(Version version, TokenStream input) {
+ public EnglishPossessiveFilter(TokenStream input) {
super(input);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
index f1685a7941b..40f1d30751d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilterFactory.java
@@ -39,7 +39,6 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
/** Creates a new EnglishPossessiveFilterFactory */
public EnglishPossessiveFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -47,6 +46,6 @@ public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
@Override
public TokenStream create(TokenStream input) {
- return new EnglishPossessiveFilter(luceneMatchVersion, input);
+ return new EnglishPossessiveFilter(input);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
index cdb397b93bf..b4d68a5f797 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
@@ -64,7 +64,6 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
*
Copyright: Copyright 2008, Luicid Imagination, Inc.
*
Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu)
*/
-import org.apache.lucene.util.Version;
/**
* This class implements the Kstem algorithm
@@ -280,7 +279,7 @@ public class KStemmer {
DictEntry defaultEntry;
DictEntry entry;
- CharArrayMap d = new CharArrayMap<>(Version.LUCENE_CURRENT, 1000, false);
+ CharArrayMap d = new CharArrayMap<>(1000, false);
for (int i = 0; i < exceptionWords.length; i++) {
if (!d.containsKey(exceptionWords[i])) {
entry = new DictEntry(exceptionWords[i], true);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
index 2ce1965af61..3c2812bbd2d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Spanish.
@@ -63,7 +62,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -75,18 +74,17 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public SpanishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public SpanishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public SpanishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -94,14 +92,12 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -118,10 +114,10 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SpanishLightStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
index 12bb7a3ef5d..4222e5a0998 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
@@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.BasqueStemmer;
/**
@@ -73,18 +72,17 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public BasqueAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public BasqueAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public BasqueAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -92,14 +90,12 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -116,10 +112,10 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new BasqueStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index 1f1b4b2bdb0..df9c2fb5c96 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Persian.
@@ -87,20 +86,18 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public PersianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public PersianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public PersianAnalyzer(Version matchVersion, CharArraySet stopwords){
- super(matchVersion, stopwords);
+ public PersianAnalyzer(CharArraySet stopwords){
+ super(stopwords);
}
/**
@@ -115,8 +112,8 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
result = new ArabicNormalizationFilter(result);
/* additional persian-specific normalization */
result = new PersianNormalizationFilter(result);
@@ -124,7 +121,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* the order here is important: the stopword list is normalized with the
* above!
*/
- return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
/**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
index 5f824429772..84a3c4ffd5f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.FinnishStemmer;
/**
@@ -64,7 +63,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public FinnishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public FinnishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public FinnishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public FinnishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new FinnishStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
index b86fb80cb86..f0acba32e48 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@@ -32,7 +32,6 @@ import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -49,7 +48,7 @@ import java.util.Arrays;
* exclusion list is empty by default.
*
*
- *
NOTE: This class uses the same {@link Version}
+ *
NOTE: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.
*/
public final class FrenchAnalyzer extends StopwordAnalyzerBase {
@@ -59,7 +58,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
/** Default set of articles for ElisionFilter */
public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
+ new CharArraySet(Arrays.asList(
"l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true));
/**
@@ -80,7 +79,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -92,37 +91,33 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
*/
- public FrenchAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public FrenchAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public FrenchAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclutionSet
* a stemming exclusion set
*/
- public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords,
+ public FrenchAnalyzer(CharArraySet stopwords,
CharArraySet stemExclutionSet) {
- super(matchVersion, stopwords);
+ super(stopwords);
this.excltable = CharArraySet.unmodifiableSet(CharArraySet
- .copy(matchVersion, stemExclutionSet));
+ .copy(stemExclutionSet));
}
/**
@@ -139,11 +134,11 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
result = new FrenchLightStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
index 089e123845b..00413d55cf1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
@@ -32,7 +32,6 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.IrishStemmer;
/**
@@ -45,7 +44,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"d", "m", "b"
), true));
@@ -56,7 +55,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
* with phrase queries versus tAthair (which would not have a gap).
*/
private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"h", "n", "t"
), true));
@@ -91,18 +90,17 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public IrishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public IrishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public IrishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -110,14 +108,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public IrishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -134,12 +130,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, HYPHENATIONS);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new StopFilter(result, HYPHENATIONS);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new IrishStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
index a40276ff6de..b79245ba15e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Galician.
@@ -62,7 +61,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,18 +73,17 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public GalicianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public GalicianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public GalicianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -93,14 +91,12 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -117,10 +113,10 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new GalicianStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
index 1edd0e8030e..4ee31f13ddb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
-import org.apache.lucene.util.Version;
/**
* Analyzer for Hindi.
@@ -75,32 +74,29 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the given stop words
*
- * @param version lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a stemming exclusion set
*/
- public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(version, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(
- CharArraySet.copy(matchVersion, stemExclusionSet));
+ public HindiAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
* Builds an analyzer with the given stop words
*
- * @param version lucene compatibility version
* @param stopwords a stopword set
*/
- public HindiAnalyzer(Version version, CharArraySet stopwords) {
- this(version, stopwords, CharArraySet.EMPTY_SET);
+ public HindiAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
- public HindiAnalyzer(Version version) {
- this(version, DefaultSetHolder.DEFAULT_STOP_SET);
+ public HindiAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
@@ -117,13 +113,13 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new LowerCaseFilter(source);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new IndicNormalizationFilter(result);
result = new HindiNormalizationFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new StopFilter(result, stopwords);
result = new HindiStemFilter(result);
return new TokenStreamComponents(source, result);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
index d2addb81747..8784e3bbb31 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.HungarianStemmer;
/**
@@ -64,7 +63,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public HungarianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public HungarianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public HungarianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public HungarianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new HungarianStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index ef4b26d2212..c5306148788 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -28,7 +28,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
@@ -215,7 +214,7 @@ final class Stemmer {
if (stems.size() < 2) {
return stems;
}
- CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+ CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
List deduped = new ArrayList<>();
for (CharsRef s : stems) {
if (!terms.contains(s)) {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
index 0f5065954ad..ae22c47d8b4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
@@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.ArmenianStemmer;
/**
@@ -73,18 +72,17 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public ArmenianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public ArmenianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public ArmenianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -92,14 +90,12 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -116,10 +112,10 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new ArmenianStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
index 85bd081e7a4..d54b3609597 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@@ -29,7 +29,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
/**
* Analyzer for Indonesian (Bahasa)
@@ -69,20 +68,18 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public IndonesianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public IndonesianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public IndonesianAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -90,17 +87,14 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* {@link IndonesianStemFilter}.
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
- public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public IndonesianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -116,10 +110,10 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty()) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
index 382bfaef9c8..afae44def4c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@@ -36,7 +36,6 @@ import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Italian.
@@ -48,7 +47,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
- new CharArraySet(Version.LUCENE_CURRENT,
+ new CharArraySet(
Arrays.asList(
"c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell",
"gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
@@ -72,7 +71,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -84,18 +83,17 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public ItalianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public ItalianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public ItalianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -103,14 +101,12 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -127,11 +123,11 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new ItalianLightStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
index c6b80ed756b..0d858428cac 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Latvian.
@@ -62,7 +61,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,18 +73,17 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public LatvianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public LatvianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public LatvianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -93,14 +91,12 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -117,10 +113,10 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new LatvianStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
index 4c9743caf2b..986994e5121 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
@@ -82,7 +82,7 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
Set k = getSet(args, KEEP);
if (k != null) {
- keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+ keep = new CharArraySet(10, ignoreCase);
keep.addAll(k);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
index 5f501e06cb2..8e1726fb5ce 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
/**
* Removes words that are too long or too short from the stream.
@@ -39,13 +38,12 @@ public final class CodepointCountFilter extends FilteringTokenFilter {
* Create a new {@link CodepointCountFilter}. This will filter out tokens whose
* {@link CharTermAttribute} is either too short ({@link Character#codePointCount(char[], int, int)}
* < min) or too long ({@link Character#codePointCount(char[], int, int)} > max).
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param min the minimum length
* @param max the maximum length
*/
- public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
- super(version, in);
+ public CodepointCountFilter(TokenStream in, int min, int max) {
+ super(in);
if (min < 0) {
throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
index 54250641d32..d42d7f6452f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilterFactory.java
@@ -50,6 +50,6 @@ public class CodepointCountFilterFactory extends TokenFilterFactory {
@Override
public CodepointCountFilter create(TokenStream input) {
- return new CodepointCountFilter(luceneMatchVersion, input, min, max);
+ return new CodepointCountFilter(input, min, max);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
index c77e3a7614b..093d22b60b9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
@@ -21,7 +21,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
/**
* A TokenFilter that only keeps tokens with text contained in the
@@ -37,12 +36,11 @@ public final class KeepWordFilter extends FilteringTokenFilter {
* Create a new {@link KeepWordFilter}.
*
NOTE: The words set passed to this constructor will be directly
* used by this filter and should not be modified.
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param words the words to keep
*/
- public KeepWordFilter(Version version, TokenStream in, CharArraySet words) {
- super(version, in);
+ public KeepWordFilter(TokenStream in, CharArraySet words) {
+ super(in);
this.words = words;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
index 78c831bc8ce..7d4c24e0968 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
@@ -44,7 +44,6 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
/** Creates a new KeepWordFilterFactory */
public KeepWordFilterFactory(Map args) {
super(args);
- assureMatchVersion();
wordFiles = get(args, "words");
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
@@ -73,7 +72,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
if (words == null) {
return input;
} else {
- final TokenStream filter = new KeepWordFilter(luceneMatchVersion, input, words);
+ final TokenStream filter = new KeepWordFilter(input, words);
return filter;
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
index f35afc68b6f..bd7e2232023 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
/**
* Removes words that are too long or too short from the stream.
@@ -39,13 +38,12 @@ public final class LengthFilter extends FilteringTokenFilter {
* Create a new {@link LengthFilter}. This will filter out tokens whose
* {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()}
* < min) or too long ({@link CharTermAttribute#length()} > max).
- * @param version the Lucene match version
* @param in the {@link TokenStream} to consume
* @param min the minimum length
* @param max the maximum length
*/
- public LengthFilter(Version version, TokenStream in, int min, int max) {
- super(version, in);
+ public LengthFilter(TokenStream in, int min, int max) {
+ super(in);
if (min < 0) {
throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
index 6d63623e0fb..476f37543ea 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilterFactory.java
@@ -50,7 +50,7 @@ public class LengthFilterFactory extends TokenFilterFactory {
@Override
public LengthFilter create(TokenStream input) {
- final LengthFilter filter = new LengthFilter(luceneMatchVersion, input,min,max);
+ final LengthFilter filter = new LengthFilter(input,min,max);
return filter;
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
index e3c7a033bdb..a1785abcf73 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
@@ -22,7 +22,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -34,8 +33,7 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
- // use a fixed version, as we don't care about case sensitivity.
- private final CharArraySet previous = new CharArraySet(Version.LUCENE_CURRENT, 8, false);
+ private final CharArraySet previous = new CharArraySet(8, false);
/**
* Creates a new RemoveDuplicatesTokenFilter
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
index 6dadf820933..20803202c77 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
@@ -20,15 +20,11 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.Version;
import java.io.IOException;
/**
* Trims leading and trailing whitespace from Tokens in the stream.
- *
As of Lucene 4.4, this filter does not support updateOffsets=true anymore
- * as it can lead to broken token streams.
*/
public final class TrimFilter extends TokenFilter {
@@ -36,10 +32,9 @@ public final class TrimFilter extends TokenFilter {
/**
* Create a new {@link TrimFilter}.
- * @param version the Lucene match version
* @param in the stream to consume
*/
- public TrimFilter(Version version, TokenStream in) {
+ public TrimFilter(TokenStream in) {
super(in);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
index c21233119cd..58c400b9bd6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilterFactory.java
@@ -47,7 +47,7 @@ public class TrimFilterFactory extends TokenFilterFactory {
@Override
public TrimFilter create(TokenStream input) {
- final TrimFilter filter = new TrimFilter(luceneMatchVersion, input);
+ final TrimFilter filter = new TrimFilter(input);
return filter;
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
index f93c0a7e773..e158910035a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
@@ -30,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.Arrays;
@@ -206,11 +205,8 @@ public final class WordDelimiterFilter extends TokenFilter {
* @param configurationFlags Flags configuring the filter
* @param protWords If not null is the set of tokens to protect from being delimited
*/
- public WordDelimiterFilter(Version matchVersion, TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
+ public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords) {
super(in);
- if (!matchVersion.onOrAfter(Version.LUCENE_4_8)) {
- throw new IllegalArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
- }
this.flags = configurationFlags;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(
@@ -225,8 +221,8 @@ public final class WordDelimiterFilter extends TokenFilter {
* @param configurationFlags Flags configuring the filter
* @param protWords If not null is the set of tokens to protect from being delimited
*/
- public WordDelimiterFilter(Version matchVersion, TokenStream in, int configurationFlags, CharArraySet protWords) {
- this(matchVersion, in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
+ public WordDelimiterFilter(TokenStream in, int configurationFlags, CharArraySet protWords) {
+ this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
index c5c478da771..780b68e1e9a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
@@ -119,7 +119,7 @@ public class WordDelimiterFilterFactory extends TokenFilterFactory implements Re
@Override
public TokenFilter create(TokenStream input) {
if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_8)) {
- return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
+ return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
flags, protectedWords);
} else {
return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
index f4647249d08..7e4a063fdba 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramFilterFactory.java
@@ -18,8 +18,11 @@ package org.apache.lucene.analysis.ngram;
*/
import java.util.Map;
+
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Creates new instances of {@link EdgeNGramTokenFilter}.
@@ -46,7 +49,10 @@ public class EdgeNGramFilterFactory extends TokenFilterFactory {
}
@Override
- public EdgeNGramTokenFilter create(TokenStream input) {
- return new EdgeNGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
+ public TokenFilter create(TokenStream input) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new EdgeNGramTokenFilter(input, minGramSize, maxGramSize);
+ }
+ return new Lucene43EdgeNGramTokenFilter(input, minGramSize, maxGramSize);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 20fda83c449..219d4ca8d15 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -26,7 +26,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
/**
* Tokenizes the given token into n-grams of given size(s).
@@ -59,18 +58,13 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param version the Lucene match version
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public EdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
+ public EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
super(input);
- if (version == null) {
- throw new IllegalArgumentException("version must not be null");
- }
-
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
@@ -79,9 +73,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
- this.charUtils = version.onOrAfter(Version.LUCENE_4_4)
- ? CharacterUtils.getInstance(version)
- : CharacterUtils.getJava4Instance();
+ this.charUtils = CharacterUtils.getInstance();
this.minGram = minGram;
this.maxGram = maxGram;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
index 8b4d42fc88d..9e277abcd63 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.ngram;
* limitations under the License.
*/
-import java.io.Reader;
-
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
@@ -38,24 +36,22 @@ public class EdgeNGramTokenizer extends NGramTokenizer {
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the Lucene match version
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public EdgeNGramTokenizer(Version version, int minGram, int maxGram) {
- super(version, minGram, maxGram, true);
+ public EdgeNGramTokenizer(int minGram, int maxGram) {
+ super(minGram, maxGram, true);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param version the Lucene match version
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public EdgeNGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram) {
- super(version, factory, minGram, maxGram, true);
+ public EdgeNGramTokenizer(AttributeFactory factory, int minGram, int maxGram) {
+ super(factory, minGram, maxGram, true);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
index 2990513f597..9772d3c98fe 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java
@@ -17,8 +17,10 @@ package org.apache.lucene.analysis.ngram;
* limitations under the License.
*/
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.Version;
import java.io.Reader;
import java.util.Map;
@@ -47,7 +49,10 @@ public class EdgeNGramTokenizerFactory extends TokenizerFactory {
}
@Override
- public EdgeNGramTokenizer create(AttributeFactory factory) {
- return new EdgeNGramTokenizer(luceneMatchVersion, factory, minGramSize, maxGramSize);
+ public Tokenizer create(AttributeFactory factory) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new EdgeNGramTokenizer(factory, minGramSize, maxGramSize);
+ }
+ return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java
new file mode 100644
index 00000000000..d465ce9e1a1
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenFilter.java
@@ -0,0 +1,126 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.CharacterUtils;
+
+import java.io.IOException;
+
+/**
+ * Tokenizes the given token into n-grams of given size(s), using pre-4.4 behavior.
+ *
+ * @deprecated Use {@link org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter}.
+ */
+@Deprecated
+public final class Lucene43EdgeNGramTokenFilter extends TokenFilter {
+ public static final int DEFAULT_MAX_GRAM_SIZE = 1;
+ public static final int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ private final CharacterUtils charUtils;
+ private final int minGram;
+ private final int maxGram;
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curCodePointCount;
+ private int curGramSize;
+ private int tokStart;
+ private int tokEnd; // only used if the length changed before this filter
+ private int savePosIncr;
+ private int savePosLen;
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
+
+ /**
+ * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+ *
+ * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
+ super(input);
+
+ if (minGram < 1) {
+ throw new IllegalArgumentException("minGram must be greater than zero");
+ }
+
+ if (minGram > maxGram) {
+ throw new IllegalArgumentException("minGram must not be greater than maxGram");
+ }
+
+ this.charUtils = CharacterUtils.getJava4Instance();
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ while (true) {
+ if (curTermBuffer == null) {
+ if (!input.incrementToken()) {
+ return false;
+ } else {
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
+ curCodePointCount = charUtils.codePointCount(termAtt);
+ curGramSize = minGram;
+ tokStart = offsetAtt.startOffset();
+ tokEnd = offsetAtt.endOffset();
+ savePosIncr += posIncrAtt.getPositionIncrement();
+ savePosLen = posLenAtt.getPositionLength();
+ }
+ }
+ if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
+ if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
+ // grab gramSize chars from front or back
+ clearAttributes();
+ offsetAtt.setOffset(tokStart, tokEnd);
+ // first ngram gets increment, others don't
+ if (curGramSize == minGram) {
+ posIncrAtt.setPositionIncrement(savePosIncr);
+ savePosIncr = 0;
+ } else {
+ posIncrAtt.setPositionIncrement(0);
+ }
+ posLenAtt.setPositionLength(savePosLen);
+ final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, 0, charLength);
+ curGramSize++;
+ return true;
+ }
+ }
+ curTermBuffer = null;
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ curTermBuffer = null;
+ savePosIncr = 0;
+ }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java
new file mode 100644
index 00000000000..5bb12d402cc
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java
@@ -0,0 +1,53 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * Tokenizes the input from an edge into n-grams of given size(s), using pre-4.4 behavior.
+ *
+ * @deprecated Use {@link org.apache.lucene.analysis.ngram.EdgeNGramTokenizer}.
+ */
+@Deprecated
+public class Lucene43EdgeNGramTokenizer extends Lucene43NGramTokenizer {
+ public static final int DEFAULT_MAX_GRAM_SIZE = 1;
+ public static final int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ /**
+ * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ *
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43EdgeNGramTokenizer(int minGram, int maxGram) {
+ super(minGram, maxGram);
+ }
+
+ /**
+ * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ *
+ * @param factory {@link org.apache.lucene.util.AttributeFactory} to use
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43EdgeNGramTokenizer(AttributeFactory factory, int minGram, int maxGram) {
+ super(factory, minGram, maxGram);
+ }
+
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java
new file mode 100644
index 00000000000..1205fb34fb2
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenFilter.java
@@ -0,0 +1,150 @@
+package org.apache.lucene.analysis.ngram;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.CharacterUtils;
+
+import java.io.IOException;
+
+/**
+ * Tokenizes the input into n-grams of the given size(s), matching Lucene 4.3 and before behavior.
+ *
+ * @deprecated Use {@link org.apache.lucene.analysis.ngram.NGramTokenFilter} instead.
+ */
+@Deprecated
+public final class Lucene43NGramTokenFilter extends TokenFilter {
+ public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
+ public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+ private final int minGram, maxGram;
+
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curCodePointCount;
+ private int curGramSize;
+ private int curPos;
+ private int curPosInc, curPosLen;
+ private int tokStart;
+ private int tokEnd;
+ private boolean hasIllegalOffsets; // only if the length changed before this filter
+
+ private final CharacterUtils charUtils;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncAtt;
+ private final PositionLengthAttribute posLenAtt;
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+ /**
+ * Creates Lucene43NGramTokenFilter with given min and max n-grams.
+ * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
+ * @param minGram the smallest n-gram to generate
+ * @param maxGram the largest n-gram to generate
+ */
+ public Lucene43NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
+ super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
+ this.charUtils = CharacterUtils.getJava4Instance();
+ if (minGram < 1) {
+ throw new IllegalArgumentException("minGram must be greater than zero");
+ }
+ if (minGram > maxGram) {
+ throw new IllegalArgumentException("minGram must not be greater than maxGram");
+ }
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+
+ posIncAtt = new PositionIncrementAttribute() {
+ @Override
+ public void setPositionIncrement(int positionIncrement) {}
+ @Override
+ public int getPositionIncrement() {
+ return 0;
+ }
+ };
+ posLenAtt = new PositionLengthAttribute() {
+ @Override
+ public void setPositionLength(int positionLength) {}
+ @Override
+ public int getPositionLength() {
+ return 0;
+ }
+ };
+ }
+
+ /**
+ * Creates NGramTokenFilter with default min and max n-grams.
+ * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
+ */
+ public Lucene43NGramTokenFilter(TokenStream input) {
+ this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
+ }
+
+ /** Returns the next token in the stream, or null at EOS. */
+ @Override
+ public final boolean incrementToken() throws IOException {
+ while (true) {
+ if (curTermBuffer == null) {
+ if (!input.incrementToken()) {
+ return false;
+ } else {
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
+ curCodePointCount = charUtils.codePointCount(termAtt);
+ curGramSize = minGram;
+ curPos = 0;
+ curPosInc = posIncAtt.getPositionIncrement();
+ curPosLen = posLenAtt.getPositionLength();
+ tokStart = offsetAtt.startOffset();
+ tokEnd = offsetAtt.endOffset();
+ // if length by start + end offsets doesn't match the term text then assume
+ // this is a synonym and don't adjust the offsets.
+ hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
+ }
+ }
+
+ while (curGramSize <= maxGram) {
+ while (curPos+curGramSize <= curTermLength) { // while there is input
+ clearAttributes();
+ termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
+ if (hasIllegalOffsets) {
+ offsetAtt.setOffset(tokStart, tokEnd);
+ } else {
+ offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
+ }
+ curPos++;
+ return true;
+ }
+ curGramSize++; // increase n-gram size
+ curPos = 0;
+ }
+ curTermBuffer = null;
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ curTermBuffer = null;
+ }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
index fa9fcb0caec..8cde3e40c6b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
@@ -29,7 +29,7 @@ import org.apache.lucene.util.AttributeFactory;
* Old broken version of {@link NGramTokenizer}.
*/
@Deprecated
-public final class Lucene43NGramTokenizer extends Tokenizer {
+public class Lucene43NGramTokenizer extends Tokenizer {
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
index 60398bdf4b2..70e802b5bc1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramFilterFactory.java
@@ -18,8 +18,11 @@ package org.apache.lucene.analysis.ngram;
*/
import java.util.Map;
+
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;
/**
* Factory for {@link NGramTokenFilter}.
@@ -46,7 +49,10 @@ public class NGramFilterFactory extends TokenFilterFactory {
}
@Override
- public NGramTokenFilter create(TokenStream input) {
- return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
+ public TokenFilter create(TokenStream input) {
+ if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
+ return new NGramTokenFilter(input, minGramSize, maxGramSize);
+ }
+ return new Lucene43NGramTokenFilter(input, minGramSize, maxGramSize);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index ba87146a44f..83b19e6e561 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -27,21 +27,18 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
/**
* Tokenizes the input into n-grams of the given size(s).
- *
- *
You must specify the required {@link Version} compatibility when
- * creating a {@link NGramTokenFilter}. As of Lucene 4.4, this token filters:
+ * As of Lucene 4.4, this token filter:
*
handles supplementary characters correctly,
*
emits all n-grams for the same token at the same position,
*
does not modify offsets,
*
sorts n-grams by their offset in the original token first, then
* increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
* "c").
- *
You can make this filter use the old behavior by providing a version <
- * {@link Version#LUCENE_4_4} in the constructor but this is not recommended as
+ *
You can make this filter use the old behavior by using
+ * {@link org.apache.lucene.analysis.ngram.Lucene43NGramTokenFilter} but this is not recommended as
* it will lead to broken {@link TokenStream}s that will cause highlighting
* bugs.
*
If you were using this {@link TokenFilter} to perform partial highlighting,
@@ -65,7 +62,6 @@ public final class NGramTokenFilter extends TokenFilter {
private int tokEnd;
private boolean hasIllegalOffsets; // only if the length changed before this filter
- private final Version version;
private final CharacterUtils charUtils;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAtt;
@@ -74,18 +70,13 @@ public final class NGramTokenFilter extends TokenFilter {
/**
* Creates NGramTokenFilter with given min and max n-grams.
- * @param version Lucene version to enable correct position increments.
- * See above for details.
* @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
- super(new CodepointCountFilter(version, input, minGram, Integer.MAX_VALUE));
- this.version = version;
- this.charUtils = version.onOrAfter(Version.LUCENE_4_4)
- ? CharacterUtils.getInstance(version)
- : CharacterUtils.getJava4Instance();
+ public NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
+ super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
+ this.charUtils = CharacterUtils.getInstance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
@@ -94,37 +85,17 @@ public final class NGramTokenFilter extends TokenFilter {
}
this.minGram = minGram;
this.maxGram = maxGram;
- if (version.onOrAfter(Version.LUCENE_4_4)) {
- posIncAtt = addAttribute(PositionIncrementAttribute.class);
- posLenAtt = addAttribute(PositionLengthAttribute.class);
- } else {
- posIncAtt = new PositionIncrementAttribute() {
- @Override
- public void setPositionIncrement(int positionIncrement) {}
- @Override
- public int getPositionIncrement() {
- return 0;
- }
- };
- posLenAtt = new PositionLengthAttribute() {
- @Override
- public void setPositionLength(int positionLength) {}
- @Override
- public int getPositionLength() {
- return 0;
- }
- };
- }
+
+ posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ posLenAtt = addAttribute(PositionLengthAttribute.class);
}
/**
* Creates NGramTokenFilter with default min and max n-grams.
- * @param version Lucene version to enable correct position increments.
- * See above for details.
* @param input {@link TokenStream} holding the input to be tokenized
*/
- public NGramTokenFilter(Version version, TokenStream input) {
- this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
+ public NGramTokenFilter(TokenStream input) {
+ this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
}
/** Returns the next token in the stream, or null at EOS. */
@@ -149,39 +120,22 @@ public final class NGramTokenFilter extends TokenFilter {
hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
}
}
- if (version.onOrAfter(Version.LUCENE_4_4)) {
- if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
- ++curPos;
- curGramSize = minGram;
- }
- if ((curPos + curGramSize) <= curCodePointCount) {
- clearAttributes();
- final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
- final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
- termAtt.copyBuffer(curTermBuffer, start, end - start);
- posIncAtt.setPositionIncrement(curPosInc);
- curPosInc = 0;
- posLenAtt.setPositionLength(curPosLen);
- offsetAtt.setOffset(tokStart, tokEnd);
- curGramSize++;
- return true;
- }
- } else {
- while (curGramSize <= maxGram) {
- while (curPos+curGramSize <= curTermLength) { // while there is input
- clearAttributes();
- termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
- if (hasIllegalOffsets) {
- offsetAtt.setOffset(tokStart, tokEnd);
- } else {
- offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
- }
- curPos++;
- return true;
- }
- curGramSize++; // increase n-gram size
- curPos = 0;
- }
+
+ if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
+ ++curPos;
+ curGramSize = minGram;
+ }
+ if ((curPos + curGramSize) <= curCodePointCount) {
+ clearAttributes();
+ final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
+ final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, start, end - start);
+ posIncAtt.setPositionIncrement(curPosInc);
+ curPosInc = 0;
+ posLenAtt.setPositionLength(curPosLen);
+ offsetAtt.setOffset(tokStart, tokEnd);
+ curGramSize++;
+ return true;
}
curTermBuffer = null;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
index 72c943b1ef9..177e46733fe 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ngram;
*/
import java.io.IOException;
-import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -27,7 +26,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.Version;
/**
* Tokenizes the input into n-grams of the given size(s).
@@ -78,51 +76,43 @@ public class NGramTokenizer extends Tokenizer {
private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- NGramTokenizer(Version version, int minGram, int maxGram, boolean edgesOnly) {
- init(version, minGram, maxGram, edgesOnly);
+ NGramTokenizer(int minGram, int maxGram, boolean edgesOnly) {
+ init(minGram, maxGram, edgesOnly);
}
/**
* Creates NGramTokenizer with given min and max n-grams.
- * @param version the lucene compatibility version
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public NGramTokenizer(Version version, int minGram, int maxGram) {
- this(version, minGram, maxGram, false);
+ public NGramTokenizer(int minGram, int maxGram) {
+ this(minGram, maxGram, false);
}
- NGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram, boolean edgesOnly) {
+ NGramTokenizer(AttributeFactory factory, int minGram, int maxGram, boolean edgesOnly) {
super(factory);
- init(version, minGram, maxGram, edgesOnly);
+ init(minGram, maxGram, edgesOnly);
}
/**
* Creates NGramTokenizer with given min and max n-grams.
- * @param version the lucene compatibility version
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
- public NGramTokenizer(Version version, AttributeFactory factory, int minGram, int maxGram) {
- this(version, factory, minGram, maxGram, false);
+ public NGramTokenizer(AttributeFactory factory, int minGram, int maxGram) {
+ this(factory, minGram, maxGram, false);
}
/**
* Creates NGramTokenizer with default min and max n-grams.
- * @param version the lucene compatibility version
*/
- public NGramTokenizer(Version version) {
- this(version, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
+ public NGramTokenizer() {
+ this(DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
}
- private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
- if (!edgesOnly && !version.onOrAfter(Version.LUCENE_4_4)) {
- throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer");
- }
- charUtils = version.onOrAfter(Version.LUCENE_4_4)
- ? CharacterUtils.getInstance(version)
- : CharacterUtils.getJava4Instance();
+ private void init(int minGram, int maxGram, boolean edgesOnly) {
+ charUtils = CharacterUtils.getInstance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
index 7aa4a502cdb..de9a010db58 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java
@@ -53,7 +53,7 @@ public class NGramTokenizerFactory extends TokenizerFactory {
@Override
public Tokenizer create(AttributeFactory factory) {
if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4)) {
- return new NGramTokenizer(luceneMatchVersion, factory, minGramSize, maxGramSize);
+ return new NGramTokenizer(factory, minGramSize, maxGramSize);
} else {
return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index 1f29184429d..e3b2389f542 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -28,13 +28,11 @@ import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -50,10 +48,8 @@ import java.nio.charset.StandardCharsets;
* A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
*
- *
- *
NOTE: This class uses the same {@link Version}
- * dependent settings as {@link StandardAnalyzer}.
*/
+// TODO: extend StopwordAnalyzerBase
public final class DutchAnalyzer extends Analyzer {
/** File containing default Dutch stopwords. */
@@ -73,14 +69,14 @@ public final class DutchAnalyzer extends Analyzer {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
- DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false);
+ DEFAULT_STEM_DICT = new CharArrayMap<>(4, false);
DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
DEFAULT_STEM_DICT.put("ei", "eier");
@@ -100,29 +96,27 @@ public final class DutchAnalyzer extends Analyzer {
private CharArraySet excltable = CharArraySet.EMPTY_SET;
private final StemmerOverrideMap stemdict;
- private final Version matchVersion;
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()})
* and a few default entries for the stem exclusion table.
*
*/
- public DutchAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
+ public DutchAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
- public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
+ public DutchAnalyzer(CharArraySet stopwords){
+ this(stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
- public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
- this(matchVersion, stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT);
+ public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable){
+ this(stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT);
}
- public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) {
- this.matchVersion = matchVersion;
- this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
- this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+ public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) {
+ this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+ this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
if (stemOverrideDict.isEmpty()) {
this.stemdict = null;
} else {
@@ -154,10 +148,10 @@ public final class DutchAnalyzer extends Analyzer {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stoptable);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stoptable);
if (!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
if (stemdict != null)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
index ffe519947d0..0dd81255964 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.NorwegianStemmer;
/**
@@ -64,7 +63,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public NorwegianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public NorwegianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public NorwegianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public NorwegianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new NorwegianStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
index 3bceb5c6ab3..fde61d6fa8c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Portuguese.
@@ -63,7 +62,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -75,18 +74,17 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public PortugueseAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public PortugueseAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public PortugueseAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -94,14 +92,12 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public PortugueseAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -118,10 +114,10 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new PortugueseLightStemFilter(result);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
index f8ad153cfb4..54ecdff15f2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@@ -31,7 +31,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
import static org.apache.lucene.analysis.util.StemmerUtil.*;
@@ -135,8 +134,7 @@ public abstract class RSLPStemmerBase {
if (!exceptions[i].endsWith(suffix))
throw new RuntimeException("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
}
- this.exceptions = new CharArraySet(Version.LUCENE_CURRENT,
- Arrays.asList(exceptions), false);
+ this.exceptions = new CharArraySet(Arrays.asList(exceptions), false);
}
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
index 8a4b8aa52ad..995ae2db893 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@@ -31,7 +31,6 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.Version;
/**
* An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection
@@ -50,23 +49,20 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
//The default maximum percentage (40%) of index documents which
//can contain a term, after which the term is considered to be a stop word.
public static final float defaultMaxDocFreqPercent = 0.4f;
- private final Version matchVersion;
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
* indexed fields from terms with a document frequency percentage greater than
* {@link #defaultMaxDocFreqPercent}
*
- * @param matchVersion Version to be used in {@link StopFilter}
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
- Version matchVersion,
Analyzer delegate,
IndexReader indexReader) throws IOException {
- this(matchVersion, delegate, indexReader, defaultMaxDocFreqPercent);
+ this(delegate, indexReader, defaultMaxDocFreqPercent);
}
/**
@@ -74,18 +70,16 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* indexed fields from terms with a document frequency greater than the given
* maxDocFreq
*
- * @param matchVersion Version to be used in {@link StopFilter}
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
- Version matchVersion,
Analyzer delegate,
IndexReader indexReader,
int maxDocFreq) throws IOException {
- this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq);
+ this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq);
}
/**
@@ -93,7 +87,6 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* indexed fields from terms with a document frequency percentage greater than
* the given maxPercentDocs
*
- * @param matchVersion Version to be used in {@link StopFilter}
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
@@ -101,11 +94,10 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
- Version matchVersion,
Analyzer delegate,
IndexReader indexReader,
float maxPercentDocs) throws IOException {
- this(matchVersion, delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs);
+ this(delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs);
}
/**
@@ -113,7 +105,6 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* given selection of fields from terms with a document frequency percentage
* greater than the given maxPercentDocs
*
- * @param matchVersion Version to be used in {@link StopFilter}
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
@@ -122,12 +113,11 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
- Version matchVersion,
Analyzer delegate,
IndexReader indexReader,
Collection fields,
float maxPercentDocs) throws IOException {
- this(matchVersion, delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
+ this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
}
/**
@@ -135,7 +125,6 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
- * @param matchVersion Version to be used in {@link StopFilter}
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
@@ -143,13 +132,11 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
- Version matchVersion,
Analyzer delegate,
IndexReader indexReader,
Collection fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
- this.matchVersion = matchVersion;
this.delegate = delegate;
for (String field : fields) {
@@ -181,8 +168,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
if (stopWords == null) {
return components;
}
- StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(),
- new CharArraySet(matchVersion, stopWords, false));
+ StopFilter stopFilter = new StopFilter(components.getTokenStream(),
+ new CharArraySet(stopWords, false));
return new TokenStreamComponents(components.getTokenizer(), stopFilter);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
index e729786cfe7..c9dee414442 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.reverse;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -36,7 +35,6 @@ public final class ReverseStringFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final char marker;
- private final Version matchVersion;
private static final char NOMARKER = '\uFFFF';
/**
@@ -66,11 +64,10 @@ public final class ReverseStringFilter extends TokenFilter {
* The reversed tokens will not be marked.
*
*
- * @param matchVersion Lucene compatibility version
* @param in {@link TokenStream} to filter
*/
- public ReverseStringFilter(Version matchVersion, TokenStream in) {
- this(matchVersion, in, NOMARKER);
+ public ReverseStringFilter(TokenStream in) {
+ this(in, NOMARKER);
}
/**
@@ -81,13 +78,11 @@ public final class ReverseStringFilter extends TokenFilter {
* character.
*
*
- * @param matchVersion compatibility version
* @param in {@link TokenStream} to filter
* @param marker A character used to mark reversed tokens
*/
- public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) {
+ public ReverseStringFilter(TokenStream in, char marker) {
super(in);
- this.matchVersion = matchVersion;
this.marker = marker;
}
@@ -100,7 +95,7 @@ public final class ReverseStringFilter extends TokenFilter {
termAtt.resizeBuffer(len);
termAtt.buffer()[len - 1] = marker;
}
- reverse( matchVersion, termAtt.buffer(), 0, len );
+ reverse( termAtt.buffer(), 0, len );
termAtt.setLength(len);
return true;
} else {
@@ -111,48 +106,43 @@ public final class ReverseStringFilter extends TokenFilter {
/**
* Reverses the given input string
*
- * @param matchVersion compatibility version
* @param input the string to reverse
* @return the given input string in reversed order
*/
- public static String reverse( Version matchVersion, final String input ){
+ public static String reverse(final String input ){
final char[] charInput = input.toCharArray();
- reverse( matchVersion, charInput, 0, charInput.length );
+ reverse( charInput, 0, charInput.length );
return new String( charInput );
}
/**
* Reverses the given input buffer in-place
- * @param matchVersion compatibility version
* @param buffer the input char array to reverse
*/
- public static void reverse(Version matchVersion, final char[] buffer) {
- reverse(matchVersion, buffer, 0, buffer.length);
+ public static void reverse(final char[] buffer) {
+ reverse(buffer, 0, buffer.length);
}
/**
* Partially reverses the given input buffer in-place from offset 0
* up to the given length.
- * @param matchVersion compatibility version
* @param buffer the input char array to reverse
* @param len the length in the buffer up to where the
* buffer should be reversed
*/
- public static void reverse(Version matchVersion, final char[] buffer,
- final int len) {
- reverse( matchVersion, buffer, 0, len );
+ public static void reverse(final char[] buffer, final int len) {
+ reverse( buffer, 0, len );
}
/**
* Partially reverses the given input buffer in-place from the given offset
* up to the given length.
- * @param matchVersion compatibility version
* @param buffer the input char array to reverse
* @param start the offset from where to reverse the buffer
* @param len the length in the buffer up to where the
* buffer should be reversed
*/
- public static void reverse(Version matchVersion, final char[] buffer,
+ public static void reverse(final char[] buffer,
final int start, final int len) {
/* modified version of Apache Harmony AbstractStringBuilder reverse0() */
if (len < 2)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
index f25831ad198..33cfc97fb5d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
@@ -40,7 +40,6 @@ public class ReverseStringFilterFactory extends TokenFilterFactory {
/** Creates a new ReverseStringFilterFactory */
public ReverseStringFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -48,7 +47,7 @@ public class ReverseStringFilterFactory extends TokenFilterFactory {
@Override
public ReverseStringFilter create(TokenStream in) {
- return new ReverseStringFilter(luceneMatchVersion,in);
+ return new ReverseStringFilter(in);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index 22af94ec177..cca18c6ecc5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -31,7 +31,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.RomanianStemmer;
/**
@@ -78,18 +77,17 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public RomanianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public RomanianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public RomanianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -97,14 +95,12 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public RomanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -121,10 +117,10 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new RomanianStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index 69ab96fa679..7dd1406aebf 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Russian language.
@@ -54,7 +53,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -74,34 +73,30 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
return DefaultSetHolder.DEFAULT_STOP_SET;
}
- public RussianAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public RussianAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
*/
- public RussianAnalyzer(Version matchVersion, CharArraySet stopwords){
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public RussianAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words
*
- * @param matchVersion
- * lucene compatibility version
* @param stopwords
* a stopword set
* @param stemExclusionSet a set of words not to be stemmed
*/
- public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+ public RussianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -117,10 +112,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
index 9f7cf319012..cd2e3353f38 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.shingle;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
/**
* A ShingleAnalyzerWrapper wraps a {@link ShingleFilter} around another {@link Analyzer}.
@@ -101,15 +100,15 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
/**
* Wraps {@link StandardAnalyzer}.
*/
- public ShingleAnalyzerWrapper(Version matchVersion) {
- this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+ public ShingleAnalyzerWrapper() {
+ this(ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
}
/**
* Wraps {@link StandardAnalyzer}.
*/
- public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) {
- this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize);
+ public ShingleAnalyzerWrapper(int minShingleSize, int maxShingleSize) {
+ this(new StandardAnalyzer(), minShingleSize, maxShingleSize);
}
/**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
index f7927161726..9663bfacaed 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@@ -17,16 +17,14 @@ package org.apache.lucene.analysis.standard;
* limitations under the License.
*/
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
-import org.apache.lucene.util.Version;
-import java.io.File;
import java.io.IOException;
import java.io.Reader;
@@ -34,18 +32,6 @@ import java.io.Reader;
* Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
* LowerCaseFilter} and {@link StopFilter}, using a list of
* English stop words.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating ClassicAnalyzer:
- *
- *
As of 3.1, StopFilter correctly handles Unicode 4.0
- * supplementary characters in stopwords
- *
As of 2.9, StopFilter preserves position
- * increments
- *
As of 2.4, Tokens incorrectly identified as acronyms
- * are corrected (see LUCENE-1068)
- *
*
* ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1.
* As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
@@ -63,29 +49,23 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the given stop words.
- * @param matchVersion Lucene version to match See {@link
- * above}
* @param stopWords stop words */
- public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) {
- super(matchVersion, stopWords);
+ public ClassicAnalyzer(CharArraySet stopWords) {
+ super(stopWords);
}
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS_SET}).
- * @param matchVersion Lucene version to match See {@link
- * above}
*/
- public ClassicAnalyzer(Version matchVersion) {
- this(matchVersion, STOP_WORDS_SET);
+ public ClassicAnalyzer() {
+ this(STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader, Version)
- * @param matchVersion Lucene version to match See {@link
- * above}
+ * @see WordlistLoader#getWordSet(Reader)
* @param stopwords Reader to read stop words from */
- public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
- this(matchVersion, loadStopwordSet(stopwords, matchVersion));
+ public ClassicAnalyzer(Reader stopwords) throws IOException {
+ this(loadStopwordSet(stopwords));
}
/**
@@ -107,11 +87,11 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- final ClassicTokenizer src = new ClassicTokenizer(matchVersion);
+ final ClassicTokenizer src = new ClassicTokenizer();
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new ClassicFilter(src);
- tok = new LowerCaseFilter(matchVersion, tok);
- tok = new StopFilter(matchVersion, tok, stopwords);
+ tok = new LowerCaseFilter(tok);
+ tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) throws IOException {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
index eb085894788..118a41cb8b0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
@@ -18,7 +18,6 @@
package org.apache.lucene.analysis.standard;
import java.io.IOException;
-import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -26,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
/** A grammar-based tokenizer constructed with JFlex
*
@@ -102,19 +99,19 @@ public final class ClassicTokenizer extends Tokenizer {
*
* See http://issues.apache.org/jira/browse/LUCENE-1068
*/
- public ClassicTokenizer(Version matchVersion) {
- init(matchVersion);
+ public ClassicTokenizer() {
+ init();
}
/**
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
*/
- public ClassicTokenizer(Version matchVersion, AttributeFactory factory) {
+ public ClassicTokenizer(AttributeFactory factory) {
super(factory);
- init(matchVersion);
+ init();
}
- private void init(Version matchVersion) {
+ private void init() {
this.scanner = new ClassicTokenizerImpl(input);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
index 3d73bd7d506..e4d901b82ba 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
@@ -37,7 +37,6 @@ public class ClassicTokenizerFactory extends TokenizerFactory {
/** Creates a new ClassicTokenizerFactory */
public ClassicTokenizerFactory(Map args) {
super(args);
- assureMatchVersion();
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -46,7 +45,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory {
@Override
public ClassicTokenizer create(AttributeFactory factory) {
- ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory);
+ ClassicTokenizer tokenizer = new ClassicTokenizer(factory);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
index 00604afc17e..db9c4719dc0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -17,16 +17,14 @@ package org.apache.lucene.analysis.standard;
* limitations under the License.
*/
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
-import org.apache.lucene.util.Version;
-import java.io.File;
import java.io.IOException;
import java.io.Reader;
@@ -34,26 +32,9 @@ import java.io.Reader;
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
* LowerCaseFilter} and {@link StopFilter}, using a list of
* English stop words.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating StandardAnalyzer:
- *
- *
As of 3.4, Hiragana and Han characters are no longer wrongly split
- * from their combining characters. If you use a previous version number,
- * you get the exact broken behavior for backwards compatibility.
- *
As of 3.1, StandardTokenizer implements Unicode text segmentation,
- * and StopFilter correctly handles Unicode 4.0 supplementary characters
- * in stopwords. {@link ClassicTokenizer} and {@link ClassicAnalyzer}
- * are the pre-3.1 implementations of StandardTokenizer and
- * StandardAnalyzer.
- *
As of 2.9, StopFilter preserves position increments
- *
As of 2.4, Tokens incorrectly identified as acronyms
- * are corrected (see LUCENE-1068)
- *
*/
public final class StandardAnalyzer extends StopwordAnalyzerBase {
-
+
/** Default maximum allowed token length */
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
@@ -64,29 +45,22 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the given stop words.
- * @param matchVersion Lucene version to match See {@link
- * above}
* @param stopWords stop words */
- public StandardAnalyzer(Version matchVersion, CharArraySet stopWords) {
- super(matchVersion, stopWords);
+ public StandardAnalyzer(CharArraySet stopWords) {
+ super(stopWords);
}
- /** Builds an analyzer with the default stop words ({@link
- * #STOP_WORDS_SET}).
- * @param matchVersion Lucene version to match See {@link
- * above}
+ /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
*/
- public StandardAnalyzer(Version matchVersion) {
- this(matchVersion, STOP_WORDS_SET);
+ public StandardAnalyzer() {
+ this(STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader, Version)
- * @param matchVersion Lucene version to match See {@link
- * above}
+ * @see WordlistLoader#getWordSet(Reader)
* @param stopwords Reader to read stop words from */
- public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
- this(matchVersion, loadStopwordSet(stopwords, matchVersion));
+ public StandardAnalyzer(Reader stopwords) throws IOException {
+ this(loadStopwordSet(stopwords));
}
/**
@@ -108,11 +82,11 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- final StandardTokenizer src = new StandardTokenizer(matchVersion);
+ final StandardTokenizer src = new StandardTokenizer();
src.setMaxTokenLength(maxTokenLength);
- TokenStream tok = new StandardFilter(matchVersion, src);
- tok = new LowerCaseFilter(matchVersion, tok);
- tok = new StopFilter(matchVersion, tok, stopwords);
+ TokenStream tok = new StandardFilter(src);
+ tok = new LowerCaseFilter(tok);
+ tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) throws IOException {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
index 809f9653dfe..ae5be75bc1e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@@ -21,14 +21,13 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.Version;
/**
* Normalizes tokens extracted with {@link StandardTokenizer}.
*/
public class StandardFilter extends TokenFilter {
- public StandardFilter(Version matchVersion, TokenStream in) {
+ public StandardFilter(TokenStream in) {
super(in);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
index f2dd7e0507f..f9102b00b44 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
@@ -38,7 +38,6 @@ public class StandardFilterFactory extends TokenFilterFactory {
/** Creates a new StandardFilterFactory */
public StandardFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -46,6 +45,6 @@ public class StandardFilterFactory extends TokenFilterFactory {
@Override
public StandardFilter create(TokenStream input) {
- return new StandardFilter(luceneMatchVersion, input);
+ return new StandardFilter(input);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
index 196c0ca1baf..bcfb6f6f267 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@@ -18,7 +18,6 @@
package org.apache.lucene.analysis.standard;
import java.io.IOException;
-import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -26,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
/** A grammar-based tokenizer constructed with JFlex.
*
@@ -116,19 +113,19 @@ public final class StandardTokenizer extends Tokenizer {
* See http://issues.apache.org/jira/browse/LUCENE-1068
*/
- public StandardTokenizer(Version matchVersion) {
- init(matchVersion);
+ public StandardTokenizer() {
+ init();
}
/**
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
*/
- public StandardTokenizer(Version matchVersion, AttributeFactory factory) {
+ public StandardTokenizer(AttributeFactory factory) {
super(factory);
- init(matchVersion);
+ init();
}
- private void init(Version matchVersion) {
+ private void init() {
this.scanner = new StandardTokenizerImpl(input);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
index bb5248b947b..87709aa8622 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
@@ -37,7 +37,6 @@ public class StandardTokenizerFactory extends TokenizerFactory {
/** Creates a new StandardTokenizerFactory */
public StandardTokenizerFactory(Map args) {
super(args);
- assureMatchVersion();
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -46,7 +45,7 @@ public class StandardTokenizerFactory extends TokenizerFactory {
@Override
public StandardTokenizer create(AttributeFactory factory) {
- StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory);
+ StandardTokenizer tokenizer = new StandardTokenizer(factory);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
index 59cfbd16ec7..53ffac224b9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@@ -34,15 +33,9 @@ import java.io.Reader;
* {@link org.apache.lucene.analysis.core.LowerCaseFilter} and
* {@link org.apache.lucene.analysis.core.StopFilter}, using a list of
* English stop words.
- *
- *
- *
- * You must specify the required {@link org.apache.lucene.util.Version}
- * compatibility when creating UAX29URLEmailAnalyzer
- *
*/
public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
-
+
/** Default maximum allowed token length */
public static final int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
@@ -53,29 +46,23 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the given stop words.
- * @param matchVersion Lucene version to match See {@link
- * above}
* @param stopWords stop words */
- public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) {
- super(matchVersion, stopWords);
+ public UAX29URLEmailAnalyzer(CharArraySet stopWords) {
+ super(stopWords);
}
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS_SET}).
- * @param matchVersion Lucene version to match See {@link
- * above}
*/
- public UAX29URLEmailAnalyzer(Version matchVersion) {
- this(matchVersion, STOP_WORDS_SET);
+ public UAX29URLEmailAnalyzer() {
+ this(STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given reader.
- * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version)
- * @param matchVersion Lucene version to match See {@link
- * above}
+ * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader)
* @param stopwords Reader to read stop words from */
- public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
- this(matchVersion, loadStopwordSet(stopwords, matchVersion));
+ public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException {
+ this(loadStopwordSet(stopwords));
}
/**
@@ -97,11 +84,11 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
- final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion);
+ final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer();
src.setMaxTokenLength(maxTokenLength);
- TokenStream tok = new StandardFilter(matchVersion, src);
- tok = new LowerCaseFilter(matchVersion, tok);
- tok = new StopFilter(matchVersion, tok, stopwords);
+ TokenStream tok = new StandardFilter(src);
+ tok = new LowerCaseFilter(tok);
+ tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) throws IOException {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
index cd1218d8da7..522276b5b5f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@@ -18,9 +18,6 @@ package org.apache.lucene.analysis.standard;
*/
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -28,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
/**
* This class implements Word Break rules from the Unicode Text Segmentation
@@ -100,19 +95,19 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
* the input to the newly created JFlex scanner.
*/
- public UAX29URLEmailTokenizer(Version matchVersion) {
- this.scanner = getScannerFor(matchVersion);
+ public UAX29URLEmailTokenizer() {
+ this.scanner = getScanner();
}
/**
* Creates a new UAX29URLEmailTokenizer with a given {@link AttributeFactory}
*/
- public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory) {
+ public UAX29URLEmailTokenizer(AttributeFactory factory) {
super(factory);
- this.scanner = getScannerFor(matchVersion);
+ this.scanner = getScanner();
}
- private StandardTokenizerInterface getScannerFor(Version matchVersion) {
+ private StandardTokenizerInterface getScanner() {
return new UAX29URLEmailTokenizerImpl(input);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
index e1218075aea..485b7d33a6e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
@@ -38,7 +38,6 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
/** Creates a new UAX29URLEmailTokenizerFactory */
public UAX29URLEmailTokenizerFactory(Map args) {
super(args);
- assureMatchVersion();
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -47,7 +46,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
@Override
public UAX29URLEmailTokenizer create(AttributeFactory factory) {
- UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory);
+ UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(factory);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index a8878ea2139..e47e7f8c55c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.SwedishStemmer;
/**
@@ -64,7 +63,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8));
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
@@ -76,18 +75,17 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public SwedishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public SwedishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
- public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public SwedishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
@@ -95,14 +93,12 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
- * @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public SwedishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -119,10 +115,10 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new SwedishStemmer());
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
index 7fcbf471c56..45bd3529015 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
@@ -134,8 +134,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT) : factory.create();
- TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
+ Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create();
+ TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
@@ -202,7 +202,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException {
Class extends Analyzer> clazz = loader.findClass(cname, Analyzer.class);
try {
- Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT);
+ Analyzer analyzer = clazz.getConstructor().newInstance();
if (analyzer instanceof ResourceLoaderAware) {
((ResourceLoaderAware) analyzer).inform(loader);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index 12e27ad2aff..b08e7845e44 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@@ -73,21 +72,18 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words.
- *
- * @param matchVersion lucene compatibility version
*/
- public ThaiAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public ThaiAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
- *
- * @param matchVersion lucene compatibility version
+ *
* @param stopwords a stopword set
*/
- public ThaiAnalyzer(Version matchVersion, CharArraySet stopwords) {
- super(matchVersion, stopwords);
+ public ThaiAnalyzer(CharArraySet stopwords) {
+ super(stopwords);
}
/**
@@ -102,17 +98,17 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- if (matchVersion.onOrAfter(Version.LUCENE_4_8)) {
+ if (getVersion().onOrAfter(Version.LUCENE_4_8)) {
final Tokenizer source = new ThaiTokenizer();
- TokenStream result = new LowerCaseFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, stopwords);
+ TokenStream result = new LowerCaseFilter(source);
+ result = new StopFilter(result, stopwords);
return new TokenStreamComponents(source, result);
} else {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new ThaiWordFilter(matchVersion, result);
- return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ result = new LowerCaseFilter(result);
+ result = new ThaiWordFilter(result);
+ return new TokenStreamComponents(source, new StopFilter(result, stopwords));
}
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
index c387333ff50..7eb1eda5b5a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@@ -28,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArrayIterator;
import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
/**
* {@link TokenFilter} that use {@link java.text.BreakIterator} to break each
@@ -61,7 +60,7 @@ public final class ThaiWordFilter extends TokenFilter {
private boolean hasIllegalOffsets = false; // only if the length changed before this filter
/** Creates a new ThaiWordFilter with the specified match version. */
- public ThaiWordFilter(Version matchVersion, TokenStream input) {
+ public ThaiWordFilter(TokenStream input) {
super(input);
if (!DBBI_AVAILABLE)
throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
index 699af7bf5a2..154187e2f6a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
@@ -41,7 +41,6 @@ public class ThaiWordFilterFactory extends TokenFilterFactory {
/** Creates a new ThaiWordFilterFactory */
public ThaiWordFilterFactory(Map args) {
super(args);
- assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -49,7 +48,7 @@ public class ThaiWordFilterFactory extends TokenFilterFactory {
@Override
public ThaiWordFilter create(TokenStream input) {
- return new ThaiWordFilter(luceneMatchVersion, input);
+ return new ThaiWordFilter(input);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index 0c8842bbfe5..60e08a0063b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -77,33 +77,30 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
- public TurkishAnalyzer(Version matchVersion) {
- this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+ public TurkishAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
- *
- * @param matchVersion lucene compatibility version
+ *
* @param stopwords a stopword set
*/
- public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords) {
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+ public TurkishAnalyzer(CharArraySet stopwords) {
+ this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
- *
- * @param matchVersion lucene compatibility version
+ *
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
- public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
- super(matchVersion, stopwords);
- this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
- matchVersion, stemExclusionSet));
+ public TurkishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
+ super(stopwords);
+ this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
@@ -120,14 +117,16 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
- final Tokenizer source = new StandardTokenizer(matchVersion);
- TokenStream result = new StandardFilter(matchVersion, source);
- if(matchVersion.onOrAfter(Version.LUCENE_4_8))
+ final Tokenizer source = new StandardTokenizer();
+ TokenStream result = new StandardFilter(source);
+ if (getVersion().onOrAfter(Version.LUCENE_4_8)) {
result = new ApostropheFilter(result);
+ }
result = new TurkishLowerCaseFilter(result);
- result = new StopFilter(matchVersion, result, stopwords);
- if(!stemExclusionSet.isEmpty())
+ result = new StopFilter(result, stopwords);
+ if (!stemExclusionSet.isEmpty()) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+ }
result = new SnowballFilter(result, new TurkishStemmer());
return new TokenStreamComponents(source, result);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
index 5234440d0ac..325e5dbad22 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
@@ -238,12 +238,10 @@ public abstract class AbstractAnalysisFactory {
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
- words = new CharArraySet(luceneMatchVersion,
- files.size() * 10, ignoreCase);
+ words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
List wlist = getLines(loader, file.trim());
- words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
- ignoreCase));
+ words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
}
}
return words;
@@ -266,8 +264,7 @@ public abstract class AbstractAnalysisFactory {
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
- words = new CharArraySet(luceneMatchVersion,
- files.size() * 10, ignoreCase);
+ words = new CharArraySet(files.size() * 10, ignoreCase);
for (String file : files) {
InputStream stream = null;
Reader reader = null;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
index f867cf7ea88..7529d93d8df 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
@@ -25,8 +25,6 @@ import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
-
/**
* A simple class that stores key Strings as char[]'s in a
@@ -36,19 +34,6 @@ import org.apache.lucene.util.Version;
* etc. It is designed to be quick to retrieve items
* by char[] keys without the necessity of converting
* to a String first.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating {@link CharArrayMap}:
- *
- *
As of 3.1, supplementary characters are
- * properly lowercased.
- *
- * Before 3.1 supplementary characters could not be
- * lowercased correctly due to the lack of Unicode 4
- * support in JDK 1.4. To use instances of
- * {@link CharArrayMap} with the behavior before Lucene
- * 3.1 pass a {@link Version} < 3.1 to the constructors.
*/
public class CharArrayMap extends AbstractMap