LUCENE-3765: Trappy behavior with StopFilter/ignoreCase

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242497 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-02-09 19:59:50 +00:00
parent ef65f76824
commit 72ae3171be
89 changed files with 363 additions and 535 deletions

View File

@ -779,6 +779,11 @@ API Changes
to be merged. To mimic the old behaviour, just use IndexReader.directory()
for choosing the provider by Directory. (Uwe Schindler)
* LUCENE-3765: Deprecated StopFilter ctor that took ignoreCase, because
in some cases (if the set is a CharArraySet), the argument is ignored.
Deprecated StandardAnalyzer and ClassicAnalyzer ctors that take File,
please use the Reader ctor instead. (Robert Muir)
New Features
* LUCENE-3593: Added a FieldValueFilter that accepts all documents that either

View File

@ -218,6 +218,10 @@ Bug Fixes
* LUCENE-3719: FVH: slow performance on very large queries.
(Igor Motov via Koji Sekiguchi)
* LUCENE-3765: As of Version.LUCENE_36, DutchAnalyzer's two ctors
that take stopwords and stem exclusion tables also initialize
the default stem overrides (e.g. kind/kinder, fiets). (Robert Muir)
Documentation
* LUCENE-3599: Javadocs for DistanceUtils.haversine() were incorrectly

View File

@ -29,6 +29,11 @@ API Changes
since they prevent reuse. Both Analyzers should be configured at instantiation.
(Chris Male)
* LUCENE-3765: Stopset ctors that previously took Set<?> or Map<?,String> now take
CharArraySet and CharArrayMap respectively. Previously the behavior was confusing,
and sometimes different depending on the type of set, and ultimately a CharArraySet
or CharArrayMap was always used anyway. (Robert Muir)
New Features
* LUCENE-2341: A new analyzer/ filter: Morfologik - a dictionary-driven lemmatizer

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ar;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -63,7 +62,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -72,7 +71,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -85,7 +84,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
}
}
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
@ -102,7 +101,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public ArabicAnalyzer(Version matchVersion, Set<?> stopwords){
public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords){
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -118,7 +117,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
public ArabicAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet){
public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -56,7 +56,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
*
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet() {
public static CharArraySet getDefaultStopSet() {
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -65,7 +65,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
* class accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -78,7 +78,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
}
}
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/**
* Builds an analyzer with the default stop words:
@ -91,7 +91,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the given stop words.
*/
public BulgarianAnalyzer(Version matchVersion, Set<?> stopwords) {
public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -100,7 +100,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
* If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerFilter}
* before {@link BulgarianStemFilter}.
*/
public BulgarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet)); }

View File

@ -19,8 +19,6 @@ package org.apache.lucene.analysis.br;
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -56,12 +54,12 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -79,7 +77,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
/**
* Contains words that should be indexed but not stemmed.
*/
private Set<?> excltable = Collections.emptySet();
private CharArraySet excltable = CharArraySet.EMPTY_SET;
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
@ -96,7 +94,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public BrazilianAnalyzer(Version matchVersion, Set<?> stopwords) {
public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) {
super(matchVersion, stopwords);
}
@ -108,8 +106,8 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public BrazilianAnalyzer(Version matchVersion, Set<?> stopwords,
Set<?> stemExclusionSet) {
public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords,
CharArraySet stemExclusionSet) {
this(matchVersion, stopwords);
excltable = CharArraySet.unmodifiableSet(CharArraySet
.copy(matchVersion, stemExclusionSet));

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ca;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -49,7 +48,7 @@ import org.tartarus.snowball.ext.CatalanStemmer;
* </ul>
*/
public final class CatalanAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Catalan stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -64,7 +63,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -73,7 +72,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -100,7 +99,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public CatalanAnalyzer(Version matchVersion, Set<?> stopwords) {
public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -113,7 +112,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public CatalanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cjk;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -27,6 +26,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
@ -49,12 +49,12 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -82,7 +82,7 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public CJKAnalyzer(Version matchVersion, Set<?> stopwords){
public CJKAnalyzer(Version matchVersion, CharArraySet stopwords){
super(matchVersion, stopwords);
}

View File

@ -10,7 +10,6 @@
package org.apache.lucene.analysis.commongrams;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -69,35 +68,9 @@ public final class CommonGramsFilter extends TokenFilter {
* @param input TokenStream input in filter chain
* @param commonWords The set of common words.
*/
public CommonGramsFilter(Version matchVersion, TokenStream input, Set<?> commonWords) {
this(matchVersion, input, commonWords, false);
}
/**
* Construct a token stream filtering the given input using a Set of common
* words to create bigrams, case-sensitive if ignoreCase is false (unless Set
* is CharArraySet). If <code>commonWords</code> is an instance of
* {@link CharArraySet} (true if <code>makeCommonSet()</code> was used to
* construct the set) it will be directly used and <code>ignoreCase</code>
* will be ignored since <code>CharArraySet</code> directly controls case
* sensitivity.
* <p/>
* If <code>commonWords</code> is not an instance of {@link CharArraySet}, a
* new CharArraySet will be constructed and <code>ignoreCase</code> will be
* used to specify the case sensitivity of that set.
*
* @param input TokenStream input in filter chain.
* @param commonWords The set of common words.
* @param ignoreCase -Ignore case when constructing bigrams for common words.
*/
public CommonGramsFilter(Version matchVersion, TokenStream input, Set<?> commonWords, boolean ignoreCase) {
public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) {
super(input);
if (commonWords instanceof CharArraySet) {
this.commonWords = (CharArraySet) commonWords;
} else {
this.commonWords = new CharArraySet(matchVersion, commonWords.size(), ignoreCase);
this.commonWords.addAll(commonWords);
}
this.commonWords = commonWords;
}
/**

View File

@ -18,10 +18,7 @@ package org.apache.lucene.analysis.compound;
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
@ -43,13 +40,6 @@ import org.apache.lucene.util.Version;
* supplementary characters in strings and char arrays provided as compound word
* dictionaries.
* </ul>
* <p>If you pass in a {@link org.apache.lucene.analysis.util.CharArraySet} as dictionary,
* it should be case-insensitive unless it contains only lowercased entries and you
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
* {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
/**
@ -80,15 +70,15 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
private AttributeSource.State current;
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, Set<?> dictionary, boolean onlyLongestMatch) {
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) {
this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
}
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, Set<?> dictionary) {
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary) {
this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
}
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, Set<?> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(input);
this.tokens=new LinkedList<CompoundToken>();
@ -96,12 +86,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
this.minSubwordSize=minSubwordSize;
this.maxSubwordSize=maxSubwordSize;
this.onlyLongestMatch=onlyLongestMatch;
if (dictionary==null || dictionary instanceof CharArraySet) {
this.dictionary = (CharArraySet) dictionary;
} else {
this.dictionary = new CharArraySet(matchVersion, dictionary, true);
}
this.dictionary = dictionary;
}
@Override

View File

@ -22,6 +22,7 @@ import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
/**
@ -38,13 +39,6 @@ import org.apache.lucene.util.Version;
* supplementary characters in strings and char arrays provided as compound word
* dictionaries.
* </ul>
* <p>If you pass in a {@link org.apache.lucene.analysis.util.CharArraySet} as dictionary,
* it should be case-insensitive unless it contains only lowercased entries and you
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
* {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
@ -61,7 +55,7 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa
* @param dictionary
* the word dictionary to match against.
*/
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, Set<?> dictionary) {
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) {
super(matchVersion, input, dictionary);
}
@ -86,7 +80,7 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa
* @param onlyLongestMatch
* Add only the longest matching subword to the stream
*/
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, Set<?> dictionary,
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary,
int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}

View File

@ -18,12 +18,12 @@ package org.apache.lucene.analysis.compound;
*/
import java.io.File;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;
@ -41,13 +41,6 @@ import org.xml.sax.InputSource;
* supplementary characters in strings and char arrays provided as compound word
* dictionaries.
* </ul>
* <p>If you pass in a {@link org.apache.lucene.analysis.util.CharArraySet} as dictionary,
* it should be case-insensitive unless it contains only lowercased entries and you
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
* {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public class HyphenationCompoundWordTokenFilter extends
CompoundWordTokenFilterBase {
@ -69,7 +62,7 @@ public class HyphenationCompoundWordTokenFilter extends
* the word dictionary to match against.
*/
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, Set<?> dictionary) {
HyphenationTree hyphenator, CharArraySet dictionary) {
this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
@ -98,7 +91,7 @@ public class HyphenationCompoundWordTokenFilter extends
* Add only the longest matching subword to the stream
*/
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch);
@ -109,14 +102,14 @@ public class HyphenationCompoundWordTokenFilter extends
/**
* Create a HyphenationCompoundWordTokenFilter with no dictionary.
* <p>
* Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, Set, int, int, int, boolean)
* Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
* HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
* null, minWordSize, minSubwordSize, maxSubwordSize }
*/
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
int maxSubwordSize) {
this(matchVersion, input, hyphenator, (Set<?>) null, minWordSize, minSubwordSize,
this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
maxSubwordSize, false);
}

View File

@ -21,7 +21,6 @@ import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
@ -46,7 +45,7 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
/** An unmodifiable set containing some common English words that are not usually useful
for searching.*/
public static final Set<?> ENGLISH_STOP_WORDS_SET;
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
static {
final List<String> stopWords = Arrays.asList(
@ -72,7 +71,7 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
/** Builds an analyzer with the stop words from the given set.
* @param matchVersion See <a href="#version">above</a>
* @param stopWords Set of stop words */
public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
public StopAnalyzer(Version matchVersion, CharArraySet stopWords) {
super(matchVersion, stopWords);
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -45,34 +44,6 @@ public final class StopFilter extends FilteringTokenFilter {
private final CharArraySet stopWords;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Construct a token stream filtering the given input. If
* <code>stopWords</code> is an instance of {@link CharArraySet} (true if
* <code>makeStopSet()</code> was used to construct the set) it will be
* directly used and <code>ignoreCase</code> will be ignored since
* <code>CharArraySet</code> directly controls case sensitivity.
* <p/>
* If <code>stopWords</code> is not an instance of {@link CharArraySet}, a new
* CharArraySet will be constructed and <code>ignoreCase</code> will be used
* to specify the case sensitivity of that set.
*
* @param matchVersion
* Lucene version to enable correct Unicode 4.0 behavior in the stop
* set if Version > 3.0. See <a href="#version">above</a> for details.
* @param input
* Input TokenStream
* @param stopWords
* A Set of Strings or char[] or any other toString()-able set
* representing the stopwords
* @param ignoreCase
* if true, all words are lower cased first
*/
public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
{
super(true, input);
this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet) stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
}
/**
* Constructs a filter which removes words from the input TokenStream that are
* named in the Set.
@ -83,12 +54,12 @@ public final class StopFilter extends FilteringTokenFilter {
* @param in
* Input stream
* @param stopWords
* A Set of Strings or char[] or any other toString()-able set
* representing the stopwords
* A {@link CharArraySet} representing the stopwords.
* @see #makeStopSet(Version, java.lang.String...)
*/
public StopFilter(Version matchVersion, TokenStream in, Set<?> stopWords) {
this(matchVersion, in, stopWords, false);
public StopFilter(Version matchVersion, TokenStream in, CharArraySet stopWords) {
super(true, in);
this.stopWords = stopWords;
}
/**
@ -101,7 +72,7 @@ public final class StopFilter extends FilteringTokenFilter {
* @param stopWords An array of stopwords
* @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
*/
public static Set<Object> makeStopSet(Version matchVersion, String... stopWords) {
public static CharArraySet makeStopSet(Version matchVersion, String... stopWords) {
return makeStopSet(matchVersion, stopWords, false);
}
@ -116,7 +87,7 @@ public final class StopFilter extends FilteringTokenFilter {
* @return A Set ({@link CharArraySet}) containing the words
* @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase
*/
public static Set<Object> makeStopSet(Version matchVersion, List<?> stopWords) {
public static CharArraySet makeStopSet(Version matchVersion, List<?> stopWords) {
return makeStopSet(matchVersion, stopWords, false);
}
@ -128,7 +99,7 @@ public final class StopFilter extends FilteringTokenFilter {
* @param ignoreCase If true, all words are lower cased first.
* @return a Set containing the words
*/
public static Set<Object> makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
public static CharArraySet makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
stopSet.addAll(Arrays.asList(stopWords));
return stopSet;
@ -141,7 +112,7 @@ public final class StopFilter extends FilteringTokenFilter {
* @param ignoreCase if true, all words are lower cased first
* @return A Set ({@link CharArraySet}) containing the words
*/
public static Set<Object> makeStopSet(Version matchVersion, List<?> stopWords, boolean ignoreCase){
public static CharArraySet makeStopSet(Version matchVersion, List<?> stopWords, boolean ignoreCase){
CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
stopSet.addAll(stopWords);
return stopSet;

View File

@ -32,7 +32,6 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import java.io.*;
import java.util.Set;
/**
* {@link Analyzer} for Czech language.
@ -62,12 +61,12 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
*
* @return a set of default Czech-stopwords
*/
public static final Set<?> getDefaultStopSet(){
public static final CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_SET;
}
private static class DefaultSetHolder {
private static final Set<?> DEFAULT_SET;
private static final CharArraySet DEFAULT_SET;
static {
try {
@ -82,7 +81,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
}
private final Set<?> stemExclusionTable;
private final CharArraySet stemExclusionTable;
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
@ -101,7 +100,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* {@link <a href="#version">above</a>}
* @param stopwords a stopword set
*/
public CzechAnalyzer(Version matchVersion, Set<?> stopwords) {
public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -114,7 +113,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionTable a stemming exclusion set
*/
public CzechAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable) {
public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) {
super(matchVersion, stopwords);
this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
}
@ -129,7 +128,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
* a version is >= LUCENE_31 and a stem exclusion set is provided via
* {@link #CzechAnalyzer(Version, Set, Set)} a
* {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a
* {@link KeywordMarkerFilter} is added before
* {@link CzechStemFilter}.
*/

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.da;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -41,7 +40,7 @@ import org.tartarus.snowball.ext.DanishStemmer;
* {@link Analyzer} for Danish.
*/
public final class DanishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Danish stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "danish_stop.txt";
@ -50,7 +49,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -59,7 +58,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -86,7 +85,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public DanishAnalyzer(Version matchVersion, Set<?> stopwords) {
public DanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -99,7 +98,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public DanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public DanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -21,7 +21,6 @@ package org.apache.lucene.analysis.de;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -90,16 +89,16 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
* Returns a set of default German-stopwords
* @return a set of default German-stopwords
*/
public static final Set<?> getDefaultStopSet(){
public static final CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_SET;
}
private static class DefaultSetHolder {
/** @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) */
@Deprecated
private static final Set<?> DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
private static final CharArraySet DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
private static final Set<?> DEFAULT_SET;
private static final CharArraySet DEFAULT_SET;
static {
try {
DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
@ -119,7 +118,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
/**
* Contains words that should be indexed but not stemmed.
*/
private final Set<?> exclusionSet;
private final CharArraySet exclusionSet;
/**
* Builds an analyzer with the default stop words:
@ -139,7 +138,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public GermanAnalyzer(Version matchVersion, Set<?> stopwords) {
public GermanAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -153,7 +152,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
* @param stemExclusionSet
* a stemming exclusion set
*/
public GermanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public GermanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.el;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -27,6 +26,7 @@ import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
@ -58,12 +58,12 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
* Returns a set of default Greek-stopwords
* @return a set of default Greek-stopwords
*/
public static final Set<?> getDefaultStopSet(){
public static final CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_SET;
}
private static class DefaultSetHolder {
private static final Set<?> DEFAULT_SET;
private static final CharArraySet DEFAULT_SET;
static {
try {
@ -95,7 +95,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
* See <a href="#version">above</a>
* @param stopwords a stopword set
*/
public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) {
super(matchVersion, stopwords);
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.en;
*/
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -37,13 +36,13 @@ import org.apache.lucene.util.Version;
* {@link Analyzer} for English.
*/
public final class EnglishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -52,7 +51,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
static final CharArraySet DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
}
/**
@ -68,7 +67,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public EnglishAnalyzer(Version matchVersion, Set<?> stopwords) {
public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -81,7 +80,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public EnglishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.es;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -48,7 +47,7 @@ import org.tartarus.snowball.ext.SpanishStemmer;
* </ul>
*/
public final class SpanishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Spanish stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "spanish_stop.txt";
@ -57,7 +56,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -66,7 +65,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -93,7 +92,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public SpanishAnalyzer(Version matchVersion, Set<?> stopwords) {
public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -106,7 +105,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public SpanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.eu;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -39,7 +38,7 @@ import org.tartarus.snowball.ext.BasqueStemmer;
* {@link Analyzer} for Basque.
*/
public final class BasqueAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Basque stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -48,7 +47,7 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -57,7 +56,7 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -84,7 +83,7 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public BasqueAnalyzer(Version matchVersion, Set<?> stopwords) {
public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -97,7 +96,7 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public BasqueAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fa;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharReader;
@ -30,6 +29,7 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
@ -63,7 +63,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -72,7 +72,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -101,7 +101,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public PersianAnalyzer(Version matchVersion, Set<?> stopwords){
public PersianAnalyzer(Version matchVersion, CharArraySet stopwords){
super(matchVersion, stopwords);
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fi;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -41,7 +40,7 @@ import org.tartarus.snowball.ext.FinnishStemmer;
* {@link Analyzer} for Finnish.
*/
public final class FinnishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Italian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "finnish_stop.txt";
@ -50,7 +49,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -59,7 +58,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -86,7 +85,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public FinnishAnalyzer(Version matchVersion, Set<?> stopwords) {
public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -99,7 +98,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public FinnishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,11 +19,9 @@ package org.apache.lucene.analysis.fr;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@ -56,7 +54,7 @@ public final class ElisionFilter extends TokenFilter {
* @param input the source {@link TokenStream}
* @param articles a set of stopword articles
*/
public ElisionFilter(Version matchVersion, TokenStream input, Set<?> articles) {
public ElisionFilter(Version matchVersion, TokenStream input, CharArraySet articles) {
super(input);
this.articles = CharArraySet.unmodifiableSet(
new CharArraySet(matchVersion, articles, true));

View File

@ -36,7 +36,6 @@ import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
/**
* {@link Analyzer} for French language.
@ -101,23 +100,23 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
/**
* Contains words that should be indexed but not stemmed.
*/
private final Set<?> excltable;
private final CharArraySet excltable;
/**
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
private static class DefaultSetHolder {
/** @deprecated (3.1) remove this in Lucene 5.0, index bw compat */
@Deprecated
static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
static final CharArraySet DEFAULT_STOP_SET_30 = CharArraySet
.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
false));
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
@ -147,7 +146,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public FrenchAnalyzer(Version matchVersion, Set<?> stopwords){
public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords){
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -161,8 +160,8 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
* @param stemExclutionSet
* a stemming exclusion set
*/
public FrenchAnalyzer(Version matchVersion, Set<?> stopwords,
Set<?> stemExclutionSet) {
public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords,
CharArraySet stemExclutionSet) {
super(matchVersion, stopwords);
this.excltable = CharArraySet.unmodifiableSet(CharArraySet
.copy(matchVersion, stemExclutionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.gl;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -39,7 +38,7 @@ import org.apache.lucene.util.Version;
* {@link Analyzer} for Galician.
*/
public final class GalicianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Galician stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -48,7 +47,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -57,7 +56,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -84,7 +83,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public GalicianAnalyzer(Version matchVersion, Set<?> stopwords) {
public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -97,7 +96,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public GalicianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.hi;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -44,7 +43,7 @@ import org.apache.lucene.util.Version;
* </ul>
*/
public final class HindiAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/**
* File containing default Hindi stopwords.
@ -59,7 +58,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -68,7 +67,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -88,7 +87,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a stemming exclusion set
*/
public HindiAnalyzer(Version version, Set<?> stopwords, Set<?> stemExclusionSet) {
public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(version, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(
CharArraySet.copy(matchVersion, stemExclusionSet));
@ -100,7 +99,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
* @param version lucene compatibility version
* @param stopwords a stopword set
*/
public HindiAnalyzer(Version version, Set<?> stopwords) {
public HindiAnalyzer(Version version, CharArraySet stopwords) {
this(version, stopwords, CharArraySet.EMPTY_SET);
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.hu;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -41,7 +40,7 @@ import org.tartarus.snowball.ext.HungarianStemmer;
* {@link Analyzer} for Hungarian.
*/
public final class HungarianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Hungarian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "hungarian_stop.txt";
@ -50,7 +49,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -59,7 +58,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -86,7 +85,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public HungarianAnalyzer(Version matchVersion, Set<?> stopwords) {
public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -99,7 +98,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public HungarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.hy;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -39,7 +38,7 @@ import org.tartarus.snowball.ext.ArmenianStemmer;
* {@link Analyzer} for Armenian.
*/
public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Armenian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -48,7 +47,7 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -57,7 +56,7 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -84,7 +83,7 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public ArmenianAnalyzer(Version matchVersion, Set<?> stopwords) {
public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -97,7 +96,7 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public ArmenianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.id;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
@ -43,7 +42,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -52,7 +51,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -65,7 +64,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
}
}
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
@ -82,7 +81,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords
* a stopword set
*/
public IndonesianAnalyzer(Version matchVersion, Set<?> stopwords){
public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords){
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -98,7 +97,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
* @param stemExclusionSet
* a set of terms not to be stemmed
*/
public IndonesianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet){
public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.it;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -52,7 +51,7 @@ import org.tartarus.snowball.ext.ItalianStemmer;
* </ul>
*/
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Italian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
@ -68,7 +67,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -77,7 +76,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -104,7 +103,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public ItalianAnalyzer(Version matchVersion, Set<?> stopwords) {
public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -117,7 +116,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public ItalianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.lv;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -27,7 +26,6 @@ import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
@ -40,7 +38,7 @@ import org.apache.lucene.util.Version;
* {@link Analyzer} for Latvian.
*/
public final class LatvianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Latvian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -49,7 +47,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -58,7 +56,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -85,7 +83,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public LatvianAnalyzer(Version matchVersion, Set<?> stopwords) {
public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -98,7 +96,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public LatvianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -18,14 +18,12 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
/**
* Marks terms as keywords via the {@link KeywordAttribute}. Each token
@ -50,27 +48,11 @@ public final class KeywordMarkerFilter extends TokenFilter {
* @param keywordSet
* the keywords set to lookup the current termbuffer
*/
public KeywordMarkerFilter(final TokenStream in,
final CharArraySet keywordSet) {
public KeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
super(in);
this.keywordSet = keywordSet;
}
/**
* Create a new KeywordMarkerFilter, that marks the current token as a
* keyword if the tokens term buffer is contained in the given set via the
* {@link KeywordAttribute}.
*
* @param in
* TokenStream to filter
* @param keywordSet
* the keywords set to lookup the current termbuffer
*/
public KeywordMarkerFilter(final TokenStream in, final Set<?> keywordSet) {
this(in, keywordSet instanceof CharArraySet ? (CharArraySet) keywordSet
: CharArraySet.copy(Version.LUCENE_31, keywordSet));
}
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {

View File

@ -22,7 +22,6 @@ import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -139,7 +138,7 @@ public final class PatternAnalyzer extends Analyzer {
private final Pattern pattern;
private final boolean toLowerCase;
private final Set<?> stopWords;
private final CharArraySet stopWords;
private final Version matchVersion;
@ -162,7 +161,7 @@ public final class PatternAnalyzer extends Analyzer {
* or <a href="http://www.unine.ch/info/clef/">other stop words
* lists </a>.
*/
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set<?> stopWords) {
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, CharArraySet stopWords) {
if (pattern == null)
throw new IllegalArgumentException("pattern must not be null");
@ -404,12 +403,12 @@ public final class PatternAnalyzer extends Analyzer {
private int pos;
private final boolean isLetter;
private final boolean toLowerCase;
private final Set<?> stopWords;
private final CharArraySet stopWords;
private static final Locale locale = Locale.getDefault();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public FastStringTokenizer(Reader input, String str, boolean isLetter, boolean toLowerCase, Set<?> stopWords) {
public FastStringTokenizer(Reader input, String str, boolean isLetter, boolean toLowerCase, CharArraySet stopWords) {
super(input);
this.str = str;
this.isLetter = isLetter;

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -46,10 +45,9 @@ public final class StemmerOverrideFilter extends TokenFilter {
* </p>
*/
public StemmerOverrideFilter(Version matchVersion, TokenStream input,
Map<?,String> dictionary) {
CharArrayMap<String> dictionary) {
super(input);
this.dictionary = dictionary instanceof CharArrayMap ?
(CharArrayMap<String>) dictionary : CharArrayMap.copy(matchVersion, dictionary);
this.dictionary = CharArrayMap.copy(matchVersion, dictionary);
}
@Override

View File

@ -28,18 +28,14 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Set;
import java.util.Map;
/**
* {@link Analyzer} for Dutch language.
@ -56,6 +52,9 @@ import java.util.Map;
* <p>You must specify the required {@link Version}
* compatibility when creating DutchAnalyzer:
* <ul>
* <li> As of 3.6, {@link #DutchAnalyzer(Version, CharArraySet)} and
* {@link #DutchAnalyzer(Version, CharArraySet, CharArraySet)} also populate
* the default entries for the stem override dictionary
* <li> As of 3.1, Snowball stemming is done with SnowballFilter,
* LowerCaseFilter is used prior to StopFilter, and Snowball
* stopwords are used by default.
@ -75,13 +74,13 @@ public final class DutchAnalyzer extends Analyzer {
* Returns an unmodifiable instance of the default stop-words set.
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static final CharArrayMap<String> DEFAULT_STEM_DICT;
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
@ -91,6 +90,12 @@ public final class DutchAnalyzer extends Analyzer {
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
DEFAULT_STEM_DICT = new CharArrayMap<String>(Version.LUCENE_CURRENT, 4, false);
DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
DEFAULT_STEM_DICT.put("ei", "eier");
DEFAULT_STEM_DICT.put("kind", "kinder");
}
}
@ -98,14 +103,14 @@ public final class DutchAnalyzer extends Analyzer {
/**
* Contains the stopwords used with the StopFilter.
*/
private final Set<?> stoptable;
private final CharArraySet stoptable;
/**
* Contains words that should be indexed but not stemmed.
*/
private Set<?> excltable = Collections.emptySet();
private CharArraySet excltable = CharArraySet.EMPTY_SET;
private final Map<String, String> stemdict = new HashMap<String, String>();
private final CharArrayMap<String> stemdict;
private final Version matchVersion;
/**
@ -114,21 +119,33 @@ public final class DutchAnalyzer extends Analyzer {
*
*/
public DutchAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
stemdict.put("fiets", "fiets"); //otherwise fiet
stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
stemdict.put("ei", "eier");
stemdict.put("kind", "kinder");
// historically, only this ctor populated the stem dict!!!!!
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
public DutchAnalyzer(Version matchVersion, Set<?> stopwords){
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
// historically, this ctor never the stem dict!!!!!
// so we populate it only for >= 3.6
this(matchVersion, stopwords, CharArraySet.EMPTY_SET,
matchVersion.onOrAfter(Version.LUCENE_36)
? DefaultSetHolder.DEFAULT_STEM_DICT
: CharArrayMap.<String>emptyMap());
}
public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
// historically, this ctor never the stem dict!!!!!
// so we populate it only for >= 3.6
this(matchVersion, stopwords, stemExclusionTable,
matchVersion.onOrAfter(Version.LUCENE_36)
? DefaultSetHolder.DEFAULT_STEM_DICT
: CharArrayMap.<String>emptyMap());
}
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
this.matchVersion = matchVersion;
this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
this.stemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
}
/**

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.no;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -41,7 +40,7 @@ import org.tartarus.snowball.ext.NorwegianStemmer;
* {@link Analyzer} for Norwegian.
*/
public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Norwegian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "norwegian_stop.txt";
@ -50,7 +49,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -59,7 +58,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -86,7 +85,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords) {
public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -99,7 +98,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.pt;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -48,7 +47,7 @@ import org.tartarus.snowball.ext.PortugueseStemmer;
* </ul>
*/
public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Portuguese stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "portuguese_stop.txt";
@ -57,7 +56,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -66,7 +65,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -93,7 +92,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords) {
public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -106,7 +105,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -22,6 +22,7 @@ import java.util.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
@ -179,7 +180,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
if (stopWords == null) {
return components;
}
StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), stopWords);
StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(),
new CharArraySet(matchVersion, stopWords, false));
return new TokenStreamComponents(components.getTokenizer(), stopFilter);
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ro;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -39,7 +38,7 @@ import org.tartarus.snowball.ext.RomanianStemmer;
* {@link Analyzer} for Romanian.
*/
public final class RomanianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Romanian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -53,7 +52,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -62,7 +61,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -89,7 +88,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public RomanianAnalyzer(Version matchVersion, Set<?> stopwords) {
public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -102,7 +101,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public RomanianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ru;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@ -78,10 +77,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
private static class DefaultSetHolder {
/** @deprecated (3.1) remove this for Lucene 5.0 */
@Deprecated
static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
static final CharArraySet DEFAULT_STOP_SET_30 = CharArraySet
.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT,
Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -95,14 +94,14 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
}
}
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/**
* Returns an unmodifiable instance of the default stop-words set.
*
* @return an unmodifiable instance of the default stop-words set.
*/
public static Set<?> getDefaultStopSet() {
public static CharArraySet getDefaultStopSet() {
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -120,7 +119,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
* @param stopwords
* a stopword set
*/
public RussianAnalyzer(Version matchVersion, Set<?> stopwords){
public RussianAnalyzer(Version matchVersion, CharArraySet stopwords){
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -133,7 +132,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
* a stopword set
* @param stemExclusionSet a set of words not to be stemmed
*/
public RussianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet){
public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet){
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
}

View File

@ -27,7 +27,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import java.io.Reader;
import java.util.Set;
/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
* LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
@ -48,7 +47,7 @@ import java.util.Set;
@Deprecated
public final class SnowballAnalyzer extends Analyzer {
private String name;
private Set<?> stopSet;
private CharArraySet stopSet;
private final Version matchVersion;
/** Builds the named analyzer with no stop words. */
@ -58,7 +57,7 @@ public final class SnowballAnalyzer extends Analyzer {
}
/** Builds the named analyzer with the given stop words. */
public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
public SnowballAnalyzer(Version matchVersion, String name, CharArraySet stopWords) {
this(matchVersion, name);
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
stopWords));

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
@ -28,7 +29,6 @@ import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
/**
* Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
@ -60,13 +60,13 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the given stop words.
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
* @param stopWords stop words */
public ClassicAnalyzer(Version matchVersion, Set<?> stopWords) {
public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) {
super(matchVersion, stopWords);
}
@ -79,15 +79,6 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
this(matchVersion, STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(Reader, Version)
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
* @param stopwords File to read stop words from */
public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
this(matchVersion, loadStopwordSet(stopwords, matchVersion));
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader, Version)
* @param matchVersion Lucene version to match See {@link

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
@ -28,7 +29,6 @@ import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
/**
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
@ -61,13 +61,13 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the given stop words.
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
* @param stopWords stop words */
public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
public StandardAnalyzer(Version matchVersion, CharArraySet stopWords) {
super(matchVersion, stopWords);
}
@ -80,15 +80,6 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
this(matchVersion, STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(Reader, Version)
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
* @param stopwords File to read stop words from */
public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
this(matchVersion, loadStopwordSet(stopwords, matchVersion));
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader, Version)
* @param matchVersion Lucene version to match See {@link

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.sv;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -41,7 +40,7 @@ import org.tartarus.snowball.ext.SwedishStemmer;
* {@link Analyzer} for Swedish.
*/
public final class SwedishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Swedish stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "swedish_stop.txt";
@ -50,7 +49,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -59,7 +58,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -86,7 +85,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public SwedishAnalyzer(Version matchVersion, Set<?> stopwords) {
public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -99,7 +98,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public SwedishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.th;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -28,6 +27,7 @@ import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
@ -55,7 +55,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -64,7 +64,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -93,7 +93,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public ThaiAnalyzer(Version matchVersion, Set<?> stopwords) {
public ThaiAnalyzer(Version matchVersion, CharArraySet stopwords) {
super(matchVersion, stopwords);
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.tr;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopFilter;
@ -38,7 +37,7 @@ import org.tartarus.snowball.ext.TurkishStemmer;
* {@link Analyzer} for Turkish.
*/
public final class TurkishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
/** File containing default Turkish stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -52,7 +51,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -61,7 +60,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
@ -88,7 +87,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public TurkishAnalyzer(Version matchVersion, Set<?> stopwords) {
public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -101,7 +100,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public TurkishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.util;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.IOUtils;
@ -46,7 +45,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer {
* @return the analyzer's stopword set or an empty set if the analyzer has no
* stopwords
*/
public Set<?> getStopwordSet() {
public CharArraySet getStopwordSet() {
return stopwords;
}
@ -58,7 +57,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer {
* @param stopwords
* the analyzer's stopword set
*/
protected StopwordAnalyzerBase(final Version version, final Set<?> stopwords) {
protected StopwordAnalyzerBase(final Version version, final CharArraySet stopwords) {
matchVersion = version;
// analyzers should use char array set for stopwords!
this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet

View File

@ -18,9 +18,6 @@ package org.apache.lucene.analysis.ar;
*/
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
@ -79,16 +76,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
Set<String> set = new HashSet<String>();
Collections.addAll(set, "the", "and", "a");
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set);
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
public void testWithStemExclusionSet() throws IOException {
Set<String> set = new HashSet<String>();
set.add("ساهدهات");
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("ساهدهات"), false);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.bg;
*/
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -43,8 +42,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
}
public void testCustomStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, Collections
.emptySet());
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "Как се казваш?",
new String[] {"как", "се", "казваш"});
}

View File

@ -136,7 +136,8 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
}
public void testStemExclusionTable() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT, Collections.emptySet(), asSet("quintessência"));
BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT,
CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("quintessência"), false));
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
}

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.ca;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -50,8 +49,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("llengües");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("llengües"), false);
Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT,
CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "llengües", "llengües");

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import java.io.StringReader;
@ -58,10 +59,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
}
public void testStopList() throws IOException {
Set<Object> stopWordsSet = new HashSet<Object>();
stopWordsSet.add("good");
stopWordsSet.add("test");
stopWordsSet.add("analyzer");
CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader);
@ -75,10 +73,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
}
public void testStopListPositions() throws IOException {
Set<Object> stopWordsSet = new HashSet<Object>();
stopWordsSet.add("good");
stopWordsSet.add("test");
stopWordsSet.add("analyzer");
CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.English;
import org.apache.lucene.util.Version;
@ -36,22 +37,15 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet("is", "the", "Time");
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
CharArraySet stopWords = new CharArraySet(TEST_VERSION_CURRENT, asSet("is", "the", "Time"), false);
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords);
assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet( "is", "the", "Time" );
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
assertTokenStreamContents(stream, new String[] { "Now" });
}
public void testStopFilt() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
@ -70,7 +64,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
log(sb.toString());
String stopWords[] = a.toArray(new String[0]);
for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
@ -93,8 +87,8 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
String stopWords1[] = a1.toArray(new String[0]);
for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
CharArraySet stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
CharArraySet stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
reader = new StringReader(sb.toString());
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true);

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.da;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("undersøgelse");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false);
Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT,
DanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "undersøgelse", "undersøgelse");

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.de;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -46,7 +45,8 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
}
public void testStemExclusionTable() throws Exception {
GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, Collections.emptySet(), asSet("tischen"));
GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET,
new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false));
checkOneTermReuse(a, "tischen", "tischen");
}

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.en;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -45,8 +44,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("books");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false);
Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT,
EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "books", "books");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.es;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("chicano");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false);
Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT,
SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "chicana", "chican");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.eu;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("zaldiak");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false);
Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT,
BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "zaldiak", "zaldiak");

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.fa;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
/**
* Test the Persian Analyzer
@ -215,7 +216,8 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, asSet("the", "and", "a"));
PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT,
new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false));
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.fi;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("edeltäjistään");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false);
Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT,
FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");

View File

@ -20,15 +20,14 @@ package org.apache.lucene.analysis.fr;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
/**
*
@ -38,9 +37,7 @@ public class TestElision extends BaseTokenStreamTestCase {
public void testElision() throws Exception {
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
Set<String> articles = new HashSet<String>();
articles.add("l");
articles.add("M");
CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
TokenFilter filter = new ElisionFilter(TEST_VERSION_CURRENT, tokenizer, articles);
List<String> tas = filter(filter);
assertEquals("embrouille", tas.get(4));

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.gl;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("correspondente");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("correspondente"), false);
Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT,
GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "correspondente", "correspondente");

View File

@ -1,10 +1,8 @@
package org.apache.lucene.analysis.hi;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -41,8 +39,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
}
public void testExclusionSet() throws Exception {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("हिंदी");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("हिंदी"), false);
Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT,
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिंदी", "हिंदी");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.hu;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("babakocsi");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false);
Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT,
HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "babakocsi", "babakocsi");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.hy;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("արծիվներ");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("արծիվներ"), false);
Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT,
ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "արծիվներ", "արծիվներ");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.id;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("peledakan");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false);
Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT,
IndonesianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "peledakan", "peledakan");

View File

@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
@ -44,8 +45,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("abbandonata");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false);
Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT,
ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "abbandonata", "abbandonata");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.lv;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("tirgiem");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false);
Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT,
LatvianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "tirgiem", "tirgiem");

View File

@ -2,10 +2,7 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@ -47,12 +44,11 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
assertTokenStreamContents(new LowerCaseFilterMock(
new KeywordMarkerFilter(new MockTokenizer(new StringReader(
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
Set<String> jdkSet = new HashSet<String>();
jdkSet.add("LuceneFox");
CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false);
assertTokenStreamContents(new LowerCaseFilterMock(
new KeywordMarkerFilter(new MockTokenizer(new StringReader(
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), jdkSet)), output);
Set<?> set2 = set;
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output);
CharArraySet set2 = set;
assertTokenStreamContents(new LowerCaseFilterMock(
new KeywordMarkerFilter(new MockTokenizer(new StringReader(
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
@ -64,8 +60,8 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
new KeywordMarkerFilter(
new KeywordMarkerFilter(
new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
new HashSet<String>(Arrays.asList("Birds", "Houses"))),
new HashSet<String>(Arrays.asList("Dogs", "Trees"))));
new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)),
new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
}

View File

@ -2,12 +2,11 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -33,7 +32,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
Map<String,String> dictionary = new HashMap<String,String>();
CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
dictionary.put("booked", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
TokenStream stream = new PorterStemFilter(

View File

@ -17,7 +17,6 @@ package org.apache.lucene.analysis.nl;
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -150,6 +149,26 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
}
/**
* check that the default stem overrides are used
* even if you use a non-default ctor.
*/
public void testStemOverrides() throws IOException {
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
checkOneTerm(a, "fiets", "fiets");
}
/**
* prior to 3.6, this confusingly did not happen if
* you specified your own stoplist!!!!
* @deprecated (3.6) Remove this test in Lucene 5.0
*/
@Deprecated
public void testBuggyStemOverrides() throws IOException {
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_35, CharArraySet.EMPTY_SET);
checkOneTerm(a, "fiets", "fiet");
}
/**
* Prior to 3.1, this analyzer had no lowercase filter.
* stopwords were case sensitive. Preserve this for back compat.

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.no;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("havnedistriktene");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("havnedistriktene"), false);
Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT,
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.pt;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("quilométricas");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT,
PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "quilométricas", "quilométricas");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.ro;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("absenţa");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false);
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT,
RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "absenţa", "absenţa");

View File

@ -18,12 +18,10 @@ package org.apache.lucene.analysis.sv;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -44,8 +42,7 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("jaktkarlarne");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false);
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT,
SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.tr;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("ağacı");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ağacı"), false);
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT,
TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "ağacı", "ağacı");

View File

@ -39,13 +39,13 @@ public class KuromojiAnalyzer extends StopwordAnalyzerBase {
this(matchVersion, new Segmenter(), DefaultSetHolder.DEFAULT_STOP_SET, DefaultSetHolder.DEFAULT_STOP_TAGS);
}
public KuromojiAnalyzer(Version matchVersion, Segmenter segmenter, Set<?> stopwords, Set<String> stoptags) {
public KuromojiAnalyzer(Version matchVersion, Segmenter segmenter, CharArraySet stopwords, Set<String> stoptags) {
super(matchVersion, stopwords);
this.segmenter = segmenter;
this.stoptags = stoptags;
}
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
@ -58,7 +58,7 @@ public class KuromojiAnalyzer extends StopwordAnalyzerBase {
* outer class accesses the static final set the first time.
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static final Set<String> DEFAULT_STOP_TAGS;
static {

View File

@ -18,10 +18,7 @@
package org.apache.lucene.analysis.cn.smart;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Collections;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
@ -58,7 +55,7 @@ import org.apache.lucene.util.Version;
*/
public final class SmartChineseAnalyzer extends Analyzer {
private final Set<?> stopWords;
private final CharArraySet stopWords;
private static final String DEFAULT_STOPWORD_FILE = "stopwords.txt";
@ -120,7 +117,7 @@ public final class SmartChineseAnalyzer extends Analyzer {
*/
public SmartChineseAnalyzer(Version matchVersion, boolean useDefaultStopWords) {
stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET
: Collections.EMPTY_SET;
: CharArraySet.EMPTY_SET;
this.matchVersion = matchVersion;
}
@ -133,8 +130,8 @@ public final class SmartChineseAnalyzer extends Analyzer {
* </p>
* @param stopWords {@link Set} of stopwords to use.
*/
public SmartChineseAnalyzer(Version matchVersion, Set stopWords) {
this.stopWords = stopWords==null?Collections.EMPTY_SET:stopWords;
public SmartChineseAnalyzer(Version matchVersion, CharArraySet stopWords) {
this.stopWords = stopWords==null?CharArraySet.EMPTY_SET:stopWords;
this.matchVersion = matchVersion;
}
@ -147,7 +144,7 @@ public final class SmartChineseAnalyzer extends Analyzer {
// The porter stemming is too strict, this is not a bug, this is a feature:)
result = new PorterStemFilter(result);
if (!stopWords.isEmpty()) {
result = new StopFilter(matchVersion, result, stopWords, false);
result = new StopFilter(matchVersion, result, stopWords);
}
return new TokenStreamComponents(tokenizer, result);
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.pl;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -42,7 +41,7 @@ import org.egothor.stemmer.Trie;
* {@link Analyzer} for Polish.
*/
public final class PolishAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
private final CharArraySet stemExclusionSet;
private final Trie stemTable;
/** File containing default Polish stopwords. */
@ -55,7 +54,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
public static CharArraySet getDefaultStopSet(){
return DefaultsHolder.DEFAULT_STOP_SET;
}
@ -64,7 +63,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
* accesses the static final set the first time.;
*/
private static class DefaultsHolder {
static final Set<?> DEFAULT_STOP_SET;
static final CharArraySet DEFAULT_STOP_SET;
static final Trie DEFAULT_TABLE;
static {
@ -100,7 +99,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public PolishAnalyzer(Version matchVersion, Set<?> stopwords) {
public PolishAnalyzer(Version matchVersion, CharArraySet stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
@ -113,7 +112,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public PolishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
public PolishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemTable = DefaultsHolder.DEFAULT_TABLE;
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(

View File

@ -18,11 +18,10 @@ package org.apache.lucene.analysis.pl;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@ -43,8 +42,7 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("studenta");
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);;
Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT,
PolishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "studenta", "studenta");

View File

@ -93,15 +93,14 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements
.getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet, but
// the getStopWords() method returns a Set<?>, so we need to cast.
// StopFilterFactory holds the stop words in a CharArraySet
solrStopWords.put(fieldName,
(CharArraySet) ((StopFilterFactory) factory).getStopWords());
((StopFilterFactory) factory).getStopWords());
}
if (factory instanceof CommonGramsFilterFactory) {
solrStopWords.put(fieldName,
(CharArraySet) ((CommonGramsFilterFactory) factory)
((CommonGramsFilterFactory) factory)
.getCommonWords());
}
}

View File

@ -17,7 +17,6 @@
package org.apache.solr.analysis;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
@ -71,12 +70,12 @@ public class CommonGramsFilterFactory extends BaseTokenFilterFactory implements
return ignoreCase;
}
public Set<?> getCommonWords() {
public CharArraySet getCommonWords() {
return commonWords;
}
public CommonGramsFilter create(TokenStream input) {
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords, ignoreCase);
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
return commonGrams;
}
}

View File

@ -18,7 +18,6 @@ package org.apache.solr.analysis;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
@ -80,7 +79,7 @@ public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
return ignoreCase;
}
public Set<?> getCommonWords() {
public CharArraySet getCommonWords() {
return commonWords;
}
@ -88,8 +87,7 @@ public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
* Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
*/
public CommonGramsQueryFilter create(TokenStream input) {
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords,
ignoreCase);
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter(
commonGrams);
return commonGramsQuery;

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import java.util.Map;
import java.util.Set;
import java.io.IOException;
/**
@ -81,13 +80,13 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
return ignoreCase;
}
public Set<?> getStopWords() {
public CharArraySet getStopWords() {
return stopWords;
}
@Override
public TokenStream create(TokenStream input) {
StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords,ignoreCase);
StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
stopFilter.setEnablePositionIncrements(enablePositionIncrements);
return stopFilter;
}

View File

@ -20,11 +20,11 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.io.StringReader;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
@ -44,7 +44,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
args.put("ignoreCase", "true");
factory.init(args);
factory.inform(loader);
Set<?> words = factory.getCommonWords();
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2,
words.size() == 2);
@ -89,7 +89,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
factory.init(args);
factory.inform(loader);
Set<?> words = factory.getCommonWords();
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue(words.contains("the"));
Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);

View File

@ -19,11 +19,11 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.io.StringReader;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
@ -43,7 +43,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
args.put("ignoreCase", "true");
factory.init(args);
factory.inform(loader);
Set<?> words = factory.getCommonWords();
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2,
words.size() == 2);
@ -88,7 +88,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
factory.init(args);
factory.inform(loader);
Set<?> words = factory.getCommonWords();
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue(words.contains("the"));
Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);

View File

@ -16,10 +16,10 @@ package org.apache.solr.analysis;
* limitations under the License.
*/
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
@ -38,7 +38,7 @@ public class TestKeepFilterFactory extends BaseTokenTestCase{
args.put("ignoreCase", "true");
factory.init(args);
factory.inform(loader);
Set<?> words = factory.getWords();
CharArraySet words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

View File

@ -17,10 +17,10 @@ package org.apache.solr.analysis;
*/
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
@ -39,7 +39,7 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
args.put("ignoreCase", "true");
factory.init(args);
factory.inform(loader);
Set<?> words = factory.getStopWords();
CharArraySet words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);