mirror of https://github.com/apache/lucene.git
LUCENE-2358: rename KeywordMarkerTokenFilter to KeywordMarkerFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ac05e3a08a
commit
d4b3141029
|
@ -26,7 +26,7 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -118,7 +118,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* {@link ArabicStemFilter}.
|
||||
*
|
||||
* @param matchVersion
|
||||
|
@ -169,7 +169,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link ArabicLetterTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerTokenFilter}
|
||||
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||
* if a stem exclusion set is provided and {@link ArabicStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -182,7 +182,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
|||
// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
|
||||
result = new ArabicNormalizationFilter(result);
|
||||
if(!stemExclusionSet.isEmpty()) {
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
}
|
||||
return new TokenStreamComponents(source, new ArabicStemFilter(result));
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ar;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
@ -29,10 +29,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words..
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see KeywordMarkerTokenFilter */
|
||||
* @see KeywordMarkerFilter */
|
||||
|
||||
public final class ArabicStemFilter extends TokenFilter {
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -109,7 +109,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words and a stem exclusion set.
|
||||
* If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerTokenFilter}
|
||||
* If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerFilter}
|
||||
* before {@link BulgarianStemFilter}.
|
||||
*/
|
||||
public BulgarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
|
||||
|
@ -126,7 +126,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link BulgarianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -136,7 +136,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new BulgarianStemFilter(result);
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.bg;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; // for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter; // for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* words.
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
*/
|
||||
|
|
|
@ -29,7 +29,7 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -209,7 +209,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new StandardFilter(result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(excltable != null && !excltable.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
||||
result = new KeywordMarkerFilter(result, excltable);
|
||||
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.br;
|
|||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; // for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter; // for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
@ -30,10 +30,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* A {@link TokenFilter} that applies {@link BrazilianStemmer}.
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see KeywordMarkerTokenFilter
|
||||
* @see KeywordMarkerFilter
|
||||
*
|
||||
*/
|
||||
public final class BrazilianStemFilter extends TokenFilter {
|
||||
|
@ -63,7 +63,7 @@ public final class BrazilianStemFilter extends TokenFilter {
|
|||
*
|
||||
* @param in the source {@link TokenStream}
|
||||
* @param exclusiontable a set of terms that should be prevented from being stemmed.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public BrazilianStemFilter(TokenStream in, Set<?> exclusiontable) {
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.cz;
|
|||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -227,7 +227,7 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
|
|||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
* a version is >= LUCENE_31 and a stem exclusion set is provided via
|
||||
* {@link #CzechAnalyzer(Version, Set, Set)} a
|
||||
* {@link KeywordMarkerTokenFilter} is added before
|
||||
* {@link KeywordMarkerFilter} is added before
|
||||
* {@link CzechStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -239,7 +239,7 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
|
|||
result = new StopFilter( matchVersion, result, stoptable);
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_31)) {
|
||||
if(!this.stemExclusionTable.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionTable);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionTable);
|
||||
result = new CzechStemFilter(result);
|
||||
}
|
||||
return new TokenStreamComponents(source, result);
|
||||
|
|
|
@ -2,7 +2,7 @@ package org.apache.lucene.analysis.cz;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
@ -29,12 +29,12 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* A {@link TokenFilter} that applies {@link CzechStemmer} to stem Czech words.
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* <p><b>NOTE</b>: Input is expected to be in lowercase,
|
||||
* but with diacritical marks</p>
|
||||
* @see KeywordMarkerTokenFilter
|
||||
* @see KeywordMarkerFilter
|
||||
*/
|
||||
public final class CzechStemFilter extends TokenFilter {
|
||||
private final CzechStemmer stemmer;
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new DanishStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -230,7 +230,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
|
@ -240,7 +240,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter( matchVersion, result, stopwords);
|
||||
result = new KeywordMarkerTokenFilter(result, exclusionSet);
|
||||
result = new KeywordMarkerFilter(result, exclusionSet);
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_31))
|
||||
result = new SnowballFilter(result, new German2Stemmer());
|
||||
else
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.de;
|
|||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
@ -35,10 +35,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* </p>
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see KeywordMarkerTokenFilter
|
||||
* @see KeywordMarkerFilter
|
||||
*/
|
||||
public final class GermanStemFilter extends TokenFilter
|
||||
{
|
||||
|
@ -65,7 +65,7 @@ public final class GermanStemFilter extends TokenFilter
|
|||
|
||||
/**
|
||||
* Builds a GermanStemFilter that uses an exclusion table.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public GermanStemFilter( TokenStream in, Set<?> exclusionSet )
|
||||
|
@ -107,7 +107,7 @@ public final class GermanStemFilter extends TokenFilter
|
|||
|
||||
/**
|
||||
* Set an alternative exclusion list for this filter.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public void setExclusionSet( Set<?> exclusionSet )
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
|
@ -75,7 +75,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -97,7 +97,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link PorterStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -108,7 +108,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new PorterStemFilter(result);
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new SpanishStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new FinnishStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.fr;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -232,7 +232,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
|
@ -245,7 +245,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!excltable.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
||||
result = new KeywordMarkerFilter(result, excltable);
|
||||
result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
} else {
|
||||
|
@ -253,7 +253,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
TokenStream result = new StandardFilter(source);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!excltable.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
||||
result = new KeywordMarkerFilter(result, excltable);
|
||||
result = new FrenchStemFilter(result);
|
||||
// Convert to lowercase after stemming!
|
||||
return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis.fr;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
|
@ -37,10 +37,10 @@ import java.util.Set;
|
|||
* </p>
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see KeywordMarkerTokenFilter
|
||||
* @see KeywordMarkerFilter
|
||||
* @deprecated Use {@link SnowballFilter} with
|
||||
* {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
|
||||
* same functionality. This filter will be removed in Lucene 4.0
|
||||
|
@ -68,7 +68,7 @@ public final class FrenchStemFilter extends TokenFilter {
|
|||
*
|
||||
* @param in the {@link TokenStream} to filter
|
||||
* @param exclusiontable a set of terms not to be stemmed
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated // TODO remove in 3.2
|
||||
public FrenchStemFilter( TokenStream in, Set<?> exclusiontable ) {
|
||||
|
@ -106,7 +106,7 @@ public final class FrenchStemFilter extends TokenFilter {
|
|||
}
|
||||
/**
|
||||
* Set an alternative exclusion list for this filter.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated // TODO remove in 3.2
|
||||
public void setExclusionTable( Map<?,?> exclusiontable ) {
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -112,7 +112,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link IndicTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
|
||||
* {@link HindiNormalizationFilter}, {@link KeywordMarkerTokenFilter}
|
||||
* {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||
* if a stem exclusion set is provided, {@link HindiStemFilter}, and
|
||||
* Hindi Stop words
|
||||
*/
|
||||
|
@ -122,7 +122,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
|||
final Tokenizer source = new IndicTokenizer(matchVersion, reader);
|
||||
TokenStream result = new LowerCaseFilter(matchVersion, source);
|
||||
if (!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new IndicNormalizationFilter(result);
|
||||
result = new HindiNormalizationFilter(result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.hi;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* <p>
|
||||
* In some cases the normalization may cause unrelated terms to conflate, so
|
||||
* to prevent terms from being normalized use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see HindiNormalizer
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new HungarianStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new ItalianStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.nl;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
|
@ -238,7 +238,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
|||
*
|
||||
* @return A {@link TokenStream} built from a {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
|
||||
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is provided,
|
||||
* {@link StemmerOverrideFilter}, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
|
@ -250,7 +250,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stoptable);
|
||||
if (!excltable.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
||||
result = new KeywordMarkerFilter(result, excltable);
|
||||
if (!stemdict.isEmpty())
|
||||
result = new StemmerOverrideFilter(matchVersion, result, stemdict);
|
||||
result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
|
||||
|
@ -260,7 +260,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
|||
TokenStream result = new StandardFilter(source);
|
||||
result = new StopFilter(matchVersion, result, stoptable);
|
||||
if (!excltable.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
||||
result = new KeywordMarkerFilter(result, excltable);
|
||||
result = new DutchStemFilter(result, stemdict);
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.HashSet;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
|
@ -39,10 +39,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* </p>
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see KeywordMarkerTokenFilter
|
||||
* @see KeywordMarkerFilter
|
||||
* @deprecated Use {@link SnowballFilter} with
|
||||
* {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
|
||||
* same functionality. This filter will be removed in Lucene 4.0
|
||||
|
@ -67,7 +67,7 @@ public final class DutchStemFilter extends TokenFilter {
|
|||
|
||||
/**
|
||||
* Builds a DutchStemFilter that uses an exclusion table.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public DutchStemFilter(TokenStream _in, Set<?> exclusiontable) {
|
||||
|
@ -85,7 +85,7 @@ public final class DutchStemFilter extends TokenFilter {
|
|||
|
||||
/**
|
||||
* @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public DutchStemFilter(TokenStream _in, Set<?> exclusiontable, Map<?,?> stemdictionary) {
|
||||
|
@ -125,7 +125,7 @@ public final class DutchStemFilter extends TokenFilter {
|
|||
|
||||
/**
|
||||
* Set an alternative exclusion list for this filter.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public void setExclusionTable(HashSet<?> exclusiontable) {
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new NorwegianStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new PortugueseStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -95,7 +95,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -117,7 +117,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -128,7 +128,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new RomanianStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
|
|||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -167,7 +167,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
|||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
|
@ -178,7 +178,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
|||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
|
||||
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
|
||||
result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
|
@ -186,7 +186,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
|||
final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
|
||||
TokenStream result = new LowerCaseFilter(matchVersion, source);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
|
||||
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
|
||||
result, stemExclusionSet);
|
||||
return new TokenStreamComponents(source, new RussianStemFilter(result));
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis.ru;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||
import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -37,10 +37,10 @@ import java.io.IOException;
|
|||
* </p>
|
||||
* <p>
|
||||
* To prevent terms from being stemmed use an instance of
|
||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
||||
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see KeywordMarkerTokenFilter
|
||||
* @see KeywordMarkerFilter
|
||||
* @deprecated Use {@link SnowballFilter} with
|
||||
* {@link org.tartarus.snowball.ext.RussianStemmer} instead, which has the
|
||||
* same functionality. This filter will be removed in Lucene 4.0
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -91,7 +91,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -113,7 +113,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new SwedishStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -94,7 +94,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
|
@ -116,7 +116,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
|
@ -127,7 +127,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
result = new TurkishLowerCaseFilter(result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new TurkishStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
|
||||
/**
|
||||
* Test the Arabic Normalization Filter
|
||||
|
@ -119,7 +119,7 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
|
|||
set.add("ساهدهات");
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
|
||||
|
||||
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerTokenFilter(tokenStream, set));
|
||||
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerFilter(tokenStream, set));
|
||||
assertTokenStreamContents(filter, new String[]{"ساهدهات"});
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -219,7 +219,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
new StringReader("строевете строеве"));
|
||||
|
||||
BulgarianStemFilter filter = new BulgarianStemFilter(
|
||||
new KeywordMarkerTokenFilter(tokenStream, set));
|
||||
new KeywordMarkerFilter(tokenStream, set));
|
||||
assertTokenStreamContents(filter, new String[] { "строй", "строеве" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -152,7 +152,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("Brasília");
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Brasília Brasilia")), set));
|
||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set1.add("Brasilia");
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Brasília Brasilia")), set), set1);
|
||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -277,7 +277,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("hole");
|
||||
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerTokenFilter(
|
||||
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
|
||||
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -48,7 +48,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("fischen");
|
||||
GermanStemFilter filter = new GermanStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Fischen Trinken")), set));
|
||||
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
set1.add("trinken");
|
||||
set1.add("fischen");
|
||||
GermanStemFilter filter = new GermanStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Fischen Trinken")), set));
|
||||
filter.setExclusionSet(set1);
|
||||
assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
|
||||
|
|
|
@ -31,14 +31,14 @@ import org.apache.lucene.util.Version;
|
|||
*
|
||||
* @see KeywordAttribute
|
||||
*/
|
||||
public final class KeywordMarkerTokenFilter extends TokenFilter {
|
||||
public final class KeywordMarkerFilter extends TokenFilter {
|
||||
|
||||
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final CharArraySet keywordSet;
|
||||
|
||||
/**
|
||||
* Create a new KeywordMarkerTokenFilter, that marks the current token as a
|
||||
* Create a new KeywordMarkerFilter, that marks the current token as a
|
||||
* keyword if the tokens term buffer is contained in the given set via the
|
||||
* {@link KeywordAttribute}.
|
||||
*
|
||||
|
@ -47,14 +47,14 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
|
|||
* @param keywordSet
|
||||
* the keywords set to lookup the current termbuffer
|
||||
*/
|
||||
public KeywordMarkerTokenFilter(final TokenStream in,
|
||||
public KeywordMarkerFilter(final TokenStream in,
|
||||
final CharArraySet keywordSet) {
|
||||
super(in);
|
||||
this.keywordSet = keywordSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new KeywordMarkerTokenFilter, that marks the current token as a
|
||||
* Create a new KeywordMarkerFilter, that marks the current token as a
|
||||
* keyword if the tokens term buffer is contained in the given set via the
|
||||
* {@link KeywordAttribute}.
|
||||
*
|
||||
|
@ -63,7 +63,7 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
|
|||
* @param keywordSet
|
||||
* the keywords set to lookup the current termbuffer
|
||||
*/
|
||||
public KeywordMarkerTokenFilter(final TokenStream in, final Set<?> keywordSet) {
|
||||
public KeywordMarkerFilter(final TokenStream in, final Set<?> keywordSet) {
|
||||
this(in, keywordSet instanceof CharArraySet ? (CharArraySet) keywordSet
|
||||
: CharArraySet.copy(Version.LUCENE_31, keywordSet));
|
||||
}
|
|
@ -27,9 +27,9 @@ import org.junit.Test;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Testcase for {@link KeywordMarkerTokenFilter}
|
||||
* Testcase for {@link KeywordMarkerFilter}
|
||||
*/
|
||||
public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase {
|
||||
public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
@Test
|
||||
public void testIncrementToken() throws IOException {
|
||||
|
@ -38,16 +38,16 @@ public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase {
|
|||
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
|
||||
"jumps" };
|
||||
assertTokenStreamContents(new LowerCaseFilterMock(
|
||||
new KeywordMarkerTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"The quIck browN LuceneFox Jumps")), set)), output);
|
||||
Set<String> jdkSet = new HashSet<String>();
|
||||
jdkSet.add("LuceneFox");
|
||||
assertTokenStreamContents(new LowerCaseFilterMock(
|
||||
new KeywordMarkerTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"The quIck browN LuceneFox Jumps")), jdkSet)), output);
|
||||
Set<?> set2 = set;
|
||||
assertTokenStreamContents(new LowerCaseFilterMock(
|
||||
new KeywordMarkerTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"The quIck browN LuceneFox Jumps")), set2)), output);
|
||||
}
|
||||
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis;
|
|||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
|
@ -60,7 +59,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
|||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("yourselves");
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
|
||||
TokenStream filter = new PorterStemFilter(new KeywordMarkerTokenFilter(tokenizer, set));
|
||||
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
|
||||
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
|
@ -50,7 +50,7 @@ public class EnglishPorterFilterFactory extends BaseTokenFilterFactory implement
|
|||
|
||||
public TokenFilter create(TokenStream input) {
|
||||
if (protectedWords != null)
|
||||
input = new KeywordMarkerTokenFilter(input, protectedWords);
|
||||
input = new KeywordMarkerFilter(input, protectedWords);
|
||||
return new SnowballFilter(input, new org.tartarus.snowball.ext.EnglishStemmer());
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.apache.solr.analysis;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
@ -26,7 +26,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Factory for {@link KeywordMarkerTokenFilter}
|
||||
* Factory for {@link KeywordMarkerFilter}
|
||||
*/
|
||||
public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String PROTECTED_TOKENS = "protected";
|
||||
|
@ -50,6 +50,6 @@ public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implement
|
|||
}
|
||||
|
||||
public TokenStream create(TokenStream input) {
|
||||
return protectedWords == null ? input : new KeywordMarkerTokenFilter(input, protectedWords);
|
||||
return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.solr.analysis;
|
|||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
@ -77,7 +77,7 @@ public class SnowballPorterFilterFactory extends BaseTokenFilterFactory implemen
|
|||
}
|
||||
|
||||
if (protectedWords != null)
|
||||
input = new KeywordMarkerTokenFilter(input, protectedWords);
|
||||
input = new KeywordMarkerFilter(input, protectedWords);
|
||||
return new SnowballFilter(input, program);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue