mirror of https://github.com/apache/lucene.git
LUCENE-2358: rename KeywordMarkerTokenFilter to KeywordMarkerFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ac05e3a08a
commit
d4b3141029
|
@ -26,7 +26,7 @@ import java.util.Set;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -118,7 +118,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
|
* Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* {@link ArabicStemFilter}.
|
* {@link ArabicStemFilter}.
|
||||||
*
|
*
|
||||||
* @param matchVersion
|
* @param matchVersion
|
||||||
|
@ -169,7 +169,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
||||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link ArabicLetterTokenizer} filtered with
|
* built from an {@link ArabicLetterTokenizer} filtered with
|
||||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||||
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerTokenFilter}
|
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||||
* if a stem exclusion set is provided and {@link ArabicStemFilter}.
|
* if a stem exclusion set is provided and {@link ArabicStemFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -182,7 +182,7 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
||||||
// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
|
// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
|
||||||
result = new ArabicNormalizationFilter(result);
|
result = new ArabicNormalizationFilter(result);
|
||||||
if(!stemExclusionSet.isEmpty()) {
|
if(!stemExclusionSet.isEmpty()) {
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
}
|
}
|
||||||
return new TokenStreamComponents(source, new ArabicStemFilter(result));
|
return new TokenStreamComponents(source, new ArabicStemFilter(result));
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ar;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
@ -29,10 +29,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words..
|
* A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words..
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see KeywordMarkerTokenFilter */
|
* @see KeywordMarkerFilter */
|
||||||
|
|
||||||
public final class ArabicStemFilter extends TokenFilter {
|
public final class ArabicStemFilter extends TokenFilter {
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Set;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -109,7 +109,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words and a stem exclusion set.
|
* Builds an analyzer with the given stop words and a stem exclusion set.
|
||||||
* If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerTokenFilter}
|
* If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerFilter}
|
||||||
* before {@link BulgarianStemFilter}.
|
* before {@link BulgarianStemFilter}.
|
||||||
*/
|
*/
|
||||||
public BulgarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
|
public BulgarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
|
||||||
|
@ -126,7 +126,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link BulgarianStemFilter}.
|
* provided and {@link BulgarianStemFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -136,7 +136,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new BulgarianStemFilter(result);
|
result = new BulgarianStemFilter(result);
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.bg;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; // for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter; // for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* words.
|
* words.
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -29,7 +29,7 @@ import java.util.Set;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -209,7 +209,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new StandardFilter(result);
|
result = new StandardFilter(result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(excltable != null && !excltable.isEmpty())
|
if(excltable != null && !excltable.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
result = new KeywordMarkerFilter(result, excltable);
|
||||||
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
|
return new TokenStreamComponents(source, new BrazilianStemFilter(result));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.br;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; // for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter; // for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
@ -30,10 +30,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* A {@link TokenFilter} that applies {@link BrazilianStemmer}.
|
* A {@link TokenFilter} that applies {@link BrazilianStemmer}.
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see KeywordMarkerTokenFilter
|
* @see KeywordMarkerFilter
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public final class BrazilianStemFilter extends TokenFilter {
|
public final class BrazilianStemFilter extends TokenFilter {
|
||||||
|
@ -63,7 +63,7 @@ public final class BrazilianStemFilter extends TokenFilter {
|
||||||
*
|
*
|
||||||
* @param in the source {@link TokenStream}
|
* @param in the source {@link TokenStream}
|
||||||
* @param exclusiontable a set of terms that should be prevented from being stemmed.
|
* @param exclusiontable a set of terms that should be prevented from being stemmed.
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public BrazilianStemFilter(TokenStream in, Set<?> exclusiontable) {
|
public BrazilianStemFilter(TokenStream in, Set<?> exclusiontable) {
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.cz;
|
||||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -227,7 +227,7 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
|
||||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||||
* a version is >= LUCENE_31 and a stem exclusion set is provided via
|
* a version is >= LUCENE_31 and a stem exclusion set is provided via
|
||||||
* {@link #CzechAnalyzer(Version, Set, Set)} a
|
* {@link #CzechAnalyzer(Version, Set, Set)} a
|
||||||
* {@link KeywordMarkerTokenFilter} is added before
|
* {@link KeywordMarkerFilter} is added before
|
||||||
* {@link CzechStemFilter}.
|
* {@link CzechStemFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -239,7 +239,7 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
|
||||||
result = new StopFilter( matchVersion, result, stoptable);
|
result = new StopFilter( matchVersion, result, stoptable);
|
||||||
if (matchVersion.onOrAfter(Version.LUCENE_31)) {
|
if (matchVersion.onOrAfter(Version.LUCENE_31)) {
|
||||||
if(!this.stemExclusionTable.isEmpty())
|
if(!this.stemExclusionTable.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionTable);
|
result = new KeywordMarkerFilter(result, stemExclusionTable);
|
||||||
result = new CzechStemFilter(result);
|
result = new CzechStemFilter(result);
|
||||||
}
|
}
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
|
|
|
@ -2,7 +2,7 @@ package org.apache.lucene.analysis.cz;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
@ -29,12 +29,12 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* A {@link TokenFilter} that applies {@link CzechStemmer} to stem Czech words.
|
* A {@link TokenFilter} that applies {@link CzechStemmer} to stem Czech words.
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* <p><b>NOTE</b>: Input is expected to be in lowercase,
|
* <p><b>NOTE</b>: Input is expected to be in lowercase,
|
||||||
* but with diacritical marks</p>
|
* but with diacritical marks</p>
|
||||||
* @see KeywordMarkerTokenFilter
|
* @see KeywordMarkerFilter
|
||||||
*/
|
*/
|
||||||
public final class CzechStemFilter extends TokenFilter {
|
public final class CzechStemFilter extends TokenFilter {
|
||||||
private final CzechStemmer stemmer;
|
private final CzechStemmer stemmer;
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new DanishStemmer());
|
result = new SnowballFilter(result, new DanishStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@ import java.util.Set;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -230,7 +230,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
||||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from a {@link StandardTokenizer} filtered with
|
* built from a {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided, and {@link SnowballFilter}
|
* provided, and {@link SnowballFilter}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -240,7 +240,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
||||||
TokenStream result = new StandardFilter(source);
|
TokenStream result = new StandardFilter(source);
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter( matchVersion, result, stopwords);
|
result = new StopFilter( matchVersion, result, stopwords);
|
||||||
result = new KeywordMarkerTokenFilter(result, exclusionSet);
|
result = new KeywordMarkerFilter(result, exclusionSet);
|
||||||
if (matchVersion.onOrAfter(Version.LUCENE_31))
|
if (matchVersion.onOrAfter(Version.LUCENE_31))
|
||||||
result = new SnowballFilter(result, new German2Stemmer());
|
result = new SnowballFilter(result, new German2Stemmer());
|
||||||
else
|
else
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.de;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
@ -35,10 +35,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see KeywordMarkerTokenFilter
|
* @see KeywordMarkerFilter
|
||||||
*/
|
*/
|
||||||
public final class GermanStemFilter extends TokenFilter
|
public final class GermanStemFilter extends TokenFilter
|
||||||
{
|
{
|
||||||
|
@ -65,7 +65,7 @@ public final class GermanStemFilter extends TokenFilter
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds a GermanStemFilter that uses an exclusion table.
|
* Builds a GermanStemFilter that uses an exclusion table.
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public GermanStemFilter( TokenStream in, Set<?> exclusionSet )
|
public GermanStemFilter( TokenStream in, Set<?> exclusionSet )
|
||||||
|
@ -107,7 +107,7 @@ public final class GermanStemFilter extends TokenFilter
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set an alternative exclusion list for this filter.
|
* Set an alternative exclusion list for this filter.
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public void setExclusionSet( Set<?> exclusionSet )
|
public void setExclusionSet( Set<?> exclusionSet )
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.PorterStemFilter;
|
import org.apache.lucene.analysis.PorterStemFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
|
@ -75,7 +75,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -97,7 +97,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link PorterStemFilter}.
|
* provided and {@link PorterStemFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -108,7 +108,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new PorterStemFilter(result);
|
result = new PorterStemFilter(result);
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new SpanishStemmer());
|
result = new SnowballFilter(result, new SpanishStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new FinnishStemmer());
|
result = new SnowballFilter(result, new FinnishStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.fr;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -232,7 +232,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
||||||
* built from a {@link StandardTokenizer} filtered with
|
* built from a {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link ElisionFilter},
|
* {@link StandardFilter}, {@link ElisionFilter},
|
||||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided, and {@link SnowballFilter}
|
* provided, and {@link SnowballFilter}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -245,7 +245,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!excltable.isEmpty())
|
if(!excltable.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
result = new KeywordMarkerFilter(result, excltable);
|
||||||
result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
|
result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
} else {
|
} else {
|
||||||
|
@ -253,7 +253,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
||||||
TokenStream result = new StandardFilter(source);
|
TokenStream result = new StandardFilter(source);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!excltable.isEmpty())
|
if(!excltable.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
result = new KeywordMarkerFilter(result, excltable);
|
||||||
result = new FrenchStemFilter(result);
|
result = new FrenchStemFilter(result);
|
||||||
// Convert to lowercase after stemming!
|
// Convert to lowercase after stemming!
|
||||||
return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
|
return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
|
||||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis.fr;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||||
|
@ -37,10 +37,10 @@ import java.util.Set;
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see KeywordMarkerTokenFilter
|
* @see KeywordMarkerFilter
|
||||||
* @deprecated Use {@link SnowballFilter} with
|
* @deprecated Use {@link SnowballFilter} with
|
||||||
* {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
|
* {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
|
||||||
* same functionality. This filter will be removed in Lucene 4.0
|
* same functionality. This filter will be removed in Lucene 4.0
|
||||||
|
@ -68,7 +68,7 @@ public final class FrenchStemFilter extends TokenFilter {
|
||||||
*
|
*
|
||||||
* @param in the {@link TokenStream} to filter
|
* @param in the {@link TokenStream} to filter
|
||||||
* @param exclusiontable a set of terms not to be stemmed
|
* @param exclusiontable a set of terms not to be stemmed
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated // TODO remove in 3.2
|
@Deprecated // TODO remove in 3.2
|
||||||
public FrenchStemFilter( TokenStream in, Set<?> exclusiontable ) {
|
public FrenchStemFilter( TokenStream in, Set<?> exclusiontable ) {
|
||||||
|
@ -106,7 +106,7 @@ public final class FrenchStemFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Set an alternative exclusion list for this filter.
|
* Set an alternative exclusion list for this filter.
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated // TODO remove in 3.2
|
@Deprecated // TODO remove in 3.2
|
||||||
public void setExclusionTable( Map<?,?> exclusiontable ) {
|
public void setExclusionTable( Map<?,?> exclusiontable ) {
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -112,7 +112,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
||||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from a {@link IndicTokenizer} filtered with
|
* built from a {@link IndicTokenizer} filtered with
|
||||||
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
|
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
|
||||||
* {@link HindiNormalizationFilter}, {@link KeywordMarkerTokenFilter}
|
* {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||||
* if a stem exclusion set is provided, {@link HindiStemFilter}, and
|
* if a stem exclusion set is provided, {@link HindiStemFilter}, and
|
||||||
* Hindi Stop words
|
* Hindi Stop words
|
||||||
*/
|
*/
|
||||||
|
@ -122,7 +122,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
||||||
final Tokenizer source = new IndicTokenizer(matchVersion, reader);
|
final Tokenizer source = new IndicTokenizer(matchVersion, reader);
|
||||||
TokenStream result = new LowerCaseFilter(matchVersion, source);
|
TokenStream result = new LowerCaseFilter(matchVersion, source);
|
||||||
if (!stemExclusionSet.isEmpty())
|
if (!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new IndicNormalizationFilter(result);
|
result = new IndicNormalizationFilter(result);
|
||||||
result = new HindiNormalizationFilter(result);
|
result = new HindiNormalizationFilter(result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.hi;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* <p>
|
* <p>
|
||||||
* In some cases the normalization may cause unrelated terms to conflate, so
|
* In some cases the normalization may cause unrelated terms to conflate, so
|
||||||
* to prevent terms from being normalized use an instance of
|
* to prevent terms from being normalized use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see HindiNormalizer
|
* @see HindiNormalizer
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new HungarianStemmer());
|
result = new SnowballFilter(result, new HungarianStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new ItalianStemmer());
|
result = new SnowballFilter(result, new ItalianStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.nl;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
|
@ -238,7 +238,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
||||||
*
|
*
|
||||||
* @return A {@link TokenStream} built from a {@link StandardTokenizer}
|
* @return A {@link TokenStream} built from a {@link StandardTokenizer}
|
||||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
|
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is provided,
|
||||||
* {@link StemmerOverrideFilter}, and {@link SnowballFilter}
|
* {@link StemmerOverrideFilter}, and {@link SnowballFilter}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -250,7 +250,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stoptable);
|
result = new StopFilter(matchVersion, result, stoptable);
|
||||||
if (!excltable.isEmpty())
|
if (!excltable.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
result = new KeywordMarkerFilter(result, excltable);
|
||||||
if (!stemdict.isEmpty())
|
if (!stemdict.isEmpty())
|
||||||
result = new StemmerOverrideFilter(matchVersion, result, stemdict);
|
result = new StemmerOverrideFilter(matchVersion, result, stemdict);
|
||||||
result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
|
result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
|
||||||
|
@ -260,7 +260,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
||||||
TokenStream result = new StandardFilter(source);
|
TokenStream result = new StandardFilter(source);
|
||||||
result = new StopFilter(matchVersion, result, stoptable);
|
result = new StopFilter(matchVersion, result, stoptable);
|
||||||
if (!excltable.isEmpty())
|
if (!excltable.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, excltable);
|
result = new KeywordMarkerFilter(result, excltable);
|
||||||
result = new DutchStemFilter(result, stemdict);
|
result = new DutchStemFilter(result, stemdict);
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||||
|
@ -39,10 +39,10 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see KeywordMarkerTokenFilter
|
* @see KeywordMarkerFilter
|
||||||
* @deprecated Use {@link SnowballFilter} with
|
* @deprecated Use {@link SnowballFilter} with
|
||||||
* {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
|
* {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
|
||||||
* same functionality. This filter will be removed in Lucene 4.0
|
* same functionality. This filter will be removed in Lucene 4.0
|
||||||
|
@ -67,7 +67,7 @@ public final class DutchStemFilter extends TokenFilter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds a DutchStemFilter that uses an exclusion table.
|
* Builds a DutchStemFilter that uses an exclusion table.
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public DutchStemFilter(TokenStream _in, Set<?> exclusiontable) {
|
public DutchStemFilter(TokenStream _in, Set<?> exclusiontable) {
|
||||||
|
@ -85,7 +85,7 @@ public final class DutchStemFilter extends TokenFilter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
|
* @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public DutchStemFilter(TokenStream _in, Set<?> exclusiontable, Map<?,?> stemdictionary) {
|
public DutchStemFilter(TokenStream _in, Set<?> exclusiontable, Map<?,?> stemdictionary) {
|
||||||
|
@ -125,7 +125,7 @@ public final class DutchStemFilter extends TokenFilter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set an alternative exclusion list for this filter.
|
* Set an alternative exclusion list for this filter.
|
||||||
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerTokenFilter} instead.
|
* @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public void setExclusionTable(HashSet<?> exclusiontable) {
|
public void setExclusionTable(HashSet<?> exclusiontable) {
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new NorwegianStemmer());
|
result = new SnowballFilter(result, new NorwegianStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new PortugueseStemmer());
|
result = new SnowballFilter(result, new PortugueseStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -95,7 +95,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -117,7 +117,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -128,7 +128,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new RomanianStemmer());
|
result = new SnowballFilter(result, new RomanianStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -167,7 +167,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
||||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from a {@link StandardTokenizer} filtered with
|
* built from a {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided, and {@link SnowballFilter}
|
* provided, and {@link SnowballFilter}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -178,7 +178,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
||||||
TokenStream result = new StandardFilter(source);
|
TokenStream result = new StandardFilter(source);
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
|
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
|
||||||
result, stemExclusionSet);
|
result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
|
result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
|
@ -186,7 +186,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
||||||
final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
|
final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
|
||||||
TokenStream result = new LowerCaseFilter(matchVersion, source);
|
TokenStream result = new LowerCaseFilter(matchVersion, source);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
|
if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
|
||||||
result, stemExclusionSet);
|
result, stemExclusionSet);
|
||||||
return new TokenStreamComponents(source, new RussianStemFilter(result));
|
return new TokenStreamComponents(source, new RussianStemFilter(result));
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis.ru;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
|
import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
|
import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -37,10 +37,10 @@ import java.io.IOException;
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* To prevent terms from being stemmed use an instance of
|
* To prevent terms from being stemmed use an instance of
|
||||||
* {@link KeywordMarkerTokenFilter} or a custom {@link TokenFilter} that sets
|
* {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||||
* </p>
|
* </p>
|
||||||
* @see KeywordMarkerTokenFilter
|
* @see KeywordMarkerFilter
|
||||||
* @deprecated Use {@link SnowballFilter} with
|
* @deprecated Use {@link SnowballFilter} with
|
||||||
* {@link org.tartarus.snowball.ext.RussianStemmer} instead, which has the
|
* {@link org.tartarus.snowball.ext.RussianStemmer} instead, which has the
|
||||||
* same functionality. This filter will be removed in Lucene 4.0
|
* same functionality. This filter will be removed in Lucene 4.0
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
|
@ -91,7 +91,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -113,7 +113,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||||
* provided and {@link SnowballFilter}.
|
* provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,7 +124,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new LowerCaseFilter(matchVersion, result);
|
result = new LowerCaseFilter(matchVersion, result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new SwedishStemmer());
|
result = new SnowballFilter(result, new SwedishStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.StopFilter;
|
||||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -94,7 +94,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||||
* provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
|
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||||
* stemming.
|
* stemming.
|
||||||
*
|
*
|
||||||
* @param matchVersion lucene compatibility version
|
* @param matchVersion lucene compatibility version
|
||||||
|
@ -116,7 +116,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
||||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||||
* built from an {@link StandardTokenizer} filtered with
|
* built from an {@link StandardTokenizer} filtered with
|
||||||
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
||||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem
|
||||||
* exclusion set is provided and {@link SnowballFilter}.
|
* exclusion set is provided and {@link SnowballFilter}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -127,7 +127,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
||||||
result = new TurkishLowerCaseFilter(result);
|
result = new TurkishLowerCaseFilter(result);
|
||||||
result = new StopFilter(matchVersion, result, stopwords);
|
result = new StopFilter(matchVersion, result, stopwords);
|
||||||
if(!stemExclusionSet.isEmpty())
|
if(!stemExclusionSet.isEmpty())
|
||||||
result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
|
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||||
result = new SnowballFilter(result, new TurkishStemmer());
|
result = new SnowballFilter(result, new TurkishStemmer());
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the Arabic Normalization Filter
|
* Test the Arabic Normalization Filter
|
||||||
|
@ -119,7 +119,7 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
|
||||||
set.add("ساهدهات");
|
set.add("ساهدهات");
|
||||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
|
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
|
||||||
|
|
||||||
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerTokenFilter(tokenStream, set));
|
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerFilter(tokenStream, set));
|
||||||
assertTokenStreamContents(filter, new String[]{"ساهدهات"});
|
assertTokenStreamContents(filter, new String[]{"ساهدهات"});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -219,7 +219,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
||||||
new StringReader("строевете строеве"));
|
new StringReader("строевете строеве"));
|
||||||
|
|
||||||
BulgarianStemFilter filter = new BulgarianStemFilter(
|
BulgarianStemFilter filter = new BulgarianStemFilter(
|
||||||
new KeywordMarkerTokenFilter(tokenStream, set));
|
new KeywordMarkerFilter(tokenStream, set));
|
||||||
assertTokenStreamContents(filter, new String[] { "строй", "строеве" });
|
assertTokenStreamContents(filter, new String[] { "строй", "строеве" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.io.StringReader;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -152,7 +152,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
||||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set.add("Brasília");
|
set.add("Brasília");
|
||||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"Brasília Brasilia")), set));
|
"Brasília Brasilia")), set));
|
||||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
|
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
||||||
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set1.add("Brasilia");
|
set1.add("Brasilia");
|
||||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"Brasília Brasilia")), set), set1);
|
"Brasília Brasilia")), set), set1);
|
||||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
|
assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -277,7 +277,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
||||||
public void testWithKeywordAttribute() throws IOException {
|
public void testWithKeywordAttribute() throws IOException {
|
||||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set.add("hole");
|
set.add("hole");
|
||||||
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerTokenFilter(
|
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
|
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
|
||||||
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.io.StringReader;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
||||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set.add("fischen");
|
set.add("fischen");
|
||||||
GermanStemFilter filter = new GermanStemFilter(
|
GermanStemFilter filter = new GermanStemFilter(
|
||||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"Fischen Trinken")), set));
|
"Fischen Trinken")), set));
|
||||||
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
|
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
|
||||||
}
|
}
|
||||||
|
@ -60,7 +60,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
||||||
set1.add("trinken");
|
set1.add("trinken");
|
||||||
set1.add("fischen");
|
set1.add("fischen");
|
||||||
GermanStemFilter filter = new GermanStemFilter(
|
GermanStemFilter filter = new GermanStemFilter(
|
||||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"Fischen Trinken")), set));
|
"Fischen Trinken")), set));
|
||||||
filter.setExclusionSet(set1);
|
filter.setExclusionSet(set1);
|
||||||
assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
|
assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
|
||||||
|
|
|
@ -31,14 +31,14 @@ import org.apache.lucene.util.Version;
|
||||||
*
|
*
|
||||||
* @see KeywordAttribute
|
* @see KeywordAttribute
|
||||||
*/
|
*/
|
||||||
public final class KeywordMarkerTokenFilter extends TokenFilter {
|
public final class KeywordMarkerFilter extends TokenFilter {
|
||||||
|
|
||||||
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final CharArraySet keywordSet;
|
private final CharArraySet keywordSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new KeywordMarkerTokenFilter, that marks the current token as a
|
* Create a new KeywordMarkerFilter, that marks the current token as a
|
||||||
* keyword if the tokens term buffer is contained in the given set via the
|
* keyword if the tokens term buffer is contained in the given set via the
|
||||||
* {@link KeywordAttribute}.
|
* {@link KeywordAttribute}.
|
||||||
*
|
*
|
||||||
|
@ -47,14 +47,14 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
|
||||||
* @param keywordSet
|
* @param keywordSet
|
||||||
* the keywords set to lookup the current termbuffer
|
* the keywords set to lookup the current termbuffer
|
||||||
*/
|
*/
|
||||||
public KeywordMarkerTokenFilter(final TokenStream in,
|
public KeywordMarkerFilter(final TokenStream in,
|
||||||
final CharArraySet keywordSet) {
|
final CharArraySet keywordSet) {
|
||||||
super(in);
|
super(in);
|
||||||
this.keywordSet = keywordSet;
|
this.keywordSet = keywordSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new KeywordMarkerTokenFilter, that marks the current token as a
|
* Create a new KeywordMarkerFilter, that marks the current token as a
|
||||||
* keyword if the tokens term buffer is contained in the given set via the
|
* keyword if the tokens term buffer is contained in the given set via the
|
||||||
* {@link KeywordAttribute}.
|
* {@link KeywordAttribute}.
|
||||||
*
|
*
|
||||||
|
@ -63,7 +63,7 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
|
||||||
* @param keywordSet
|
* @param keywordSet
|
||||||
* the keywords set to lookup the current termbuffer
|
* the keywords set to lookup the current termbuffer
|
||||||
*/
|
*/
|
||||||
public KeywordMarkerTokenFilter(final TokenStream in, final Set<?> keywordSet) {
|
public KeywordMarkerFilter(final TokenStream in, final Set<?> keywordSet) {
|
||||||
this(in, keywordSet instanceof CharArraySet ? (CharArraySet) keywordSet
|
this(in, keywordSet instanceof CharArraySet ? (CharArraySet) keywordSet
|
||||||
: CharArraySet.copy(Version.LUCENE_31, keywordSet));
|
: CharArraySet.copy(Version.LUCENE_31, keywordSet));
|
||||||
}
|
}
|
|
@ -27,9 +27,9 @@ import org.junit.Test;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Testcase for {@link KeywordMarkerTokenFilter}
|
* Testcase for {@link KeywordMarkerFilter}
|
||||||
*/
|
*/
|
||||||
public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase {
|
public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIncrementToken() throws IOException {
|
public void testIncrementToken() throws IOException {
|
||||||
|
@ -38,16 +38,16 @@ public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase {
|
||||||
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
|
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
|
||||||
"jumps" };
|
"jumps" };
|
||||||
assertTokenStreamContents(new LowerCaseFilterMock(
|
assertTokenStreamContents(new LowerCaseFilterMock(
|
||||||
new KeywordMarkerTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"The quIck browN LuceneFox Jumps")), set)), output);
|
"The quIck browN LuceneFox Jumps")), set)), output);
|
||||||
Set<String> jdkSet = new HashSet<String>();
|
Set<String> jdkSet = new HashSet<String>();
|
||||||
jdkSet.add("LuceneFox");
|
jdkSet.add("LuceneFox");
|
||||||
assertTokenStreamContents(new LowerCaseFilterMock(
|
assertTokenStreamContents(new LowerCaseFilterMock(
|
||||||
new KeywordMarkerTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"The quIck browN LuceneFox Jumps")), jdkSet)), output);
|
"The quIck browN LuceneFox Jumps")), jdkSet)), output);
|
||||||
Set<?> set2 = set;
|
Set<?> set2 = set;
|
||||||
assertTokenStreamContents(new LowerCaseFilterMock(
|
assertTokenStreamContents(new LowerCaseFilterMock(
|
||||||
new KeywordMarkerTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||||
"The quIck browN LuceneFox Jumps")), set2)), output);
|
"The quIck browN LuceneFox Jumps")), set2)), output);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
@ -60,7 +59,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
||||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set.add("yourselves");
|
set.add("yourselves");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
|
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
|
||||||
TokenStream filter = new PorterStemFilter(new KeywordMarkerTokenFilter(tokenizer, set));
|
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
|
||||||
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||||
|
@ -50,7 +50,7 @@ public class EnglishPorterFilterFactory extends BaseTokenFilterFactory implement
|
||||||
|
|
||||||
public TokenFilter create(TokenStream input) {
|
public TokenFilter create(TokenStream input) {
|
||||||
if (protectedWords != null)
|
if (protectedWords != null)
|
||||||
input = new KeywordMarkerTokenFilter(input, protectedWords);
|
input = new KeywordMarkerFilter(input, protectedWords);
|
||||||
return new SnowballFilter(input, new org.tartarus.snowball.ext.EnglishStemmer());
|
return new SnowballFilter(input, new org.tartarus.snowball.ext.EnglishStemmer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ package org.apache.solr.analysis;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||||
|
@ -26,7 +26,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link KeywordMarkerTokenFilter}
|
* Factory for {@link KeywordMarkerFilter}
|
||||||
*/
|
*/
|
||||||
public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||||
public static final String PROTECTED_TOKENS = "protected";
|
public static final String PROTECTED_TOKENS = "protected";
|
||||||
|
@ -50,6 +50,6 @@ public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implement
|
||||||
}
|
}
|
||||||
|
|
||||||
public TokenStream create(TokenStream input) {
|
public TokenStream create(TokenStream input) {
|
||||||
return protectedWords == null ? input : new KeywordMarkerTokenFilter(input, protectedWords);
|
return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.solr.analysis;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
import org.apache.lucene.analysis.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
@ -77,7 +77,7 @@ public class SnowballPorterFilterFactory extends BaseTokenFilterFactory implemen
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protectedWords != null)
|
if (protectedWords != null)
|
||||||
input = new KeywordMarkerTokenFilter(input, protectedWords);
|
input = new KeywordMarkerFilter(input, protectedWords);
|
||||||
return new SnowballFilter(input, program);
|
return new SnowballFilter(input, program);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue