mirror of https://github.com/apache/lucene.git
improve doc for default constructor; make stop word list public but final
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@151013 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8fd3b8498b
commit
a8d98638a0
|
@ -73,14 +73,14 @@ import java.util.Set;
|
||||||
* will not be indexed at all) and an external list of exclusions (word that will
|
* will not be indexed at all) and an external list of exclusions (word that will
|
||||||
* not be stemmed, but indexed).
|
* not be stemmed, but indexed).
|
||||||
*
|
*
|
||||||
* @author João Kramer
|
* @author Jo<EFBFBD>o Kramer
|
||||||
*/
|
*/
|
||||||
public final class BrazilianAnalyzer extends Analyzer {
|
public final class BrazilianAnalyzer extends Analyzer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of typical german stopwords.
|
* List of typical german stopwords.
|
||||||
*/
|
*/
|
||||||
private String[] BRAZILIAN_STOP_WORDS = {
|
public final static String[] BRAZILIAN_STOP_WORDS = {
|
||||||
"a","ainda","alem","ambas","ambos","antes",
|
"a","ainda","alem","ambas","ambos","antes",
|
||||||
"ao","aonde","aos","apos","aquele","aqueles",
|
"ao","aonde","aos","apos","aquele","aqueles",
|
||||||
"as","assim","com","como","contra","contudo",
|
"as","assim","com","como","contra","contudo",
|
||||||
|
@ -112,7 +112,7 @@ public final class BrazilianAnalyzer extends Analyzer {
|
||||||
private Set excltable = new HashSet();
|
private Set excltable = new HashSet();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer.
|
* Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
|
||||||
*/
|
*/
|
||||||
public BrazilianAnalyzer() {
|
public BrazilianAnalyzer() {
|
||||||
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
|
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
|
||||||
|
|
|
@ -76,7 +76,7 @@ public class CJKAnalyzer extends Analyzer {
|
||||||
* An array containing some common English words that are not usually
|
* An array containing some common English words that are not usually
|
||||||
* useful for searching. and some double-byte interpunctions.....
|
* useful for searching. and some double-byte interpunctions.....
|
||||||
*/
|
*/
|
||||||
private static String[] stopWords = {
|
public final static String[] STOP_WORDS = {
|
||||||
"a", "and", "are", "as", "at", "be",
|
"a", "and", "are", "as", "at", "be",
|
||||||
"but", "by", "for", "if", "in",
|
"but", "by", "for", "if", "in",
|
||||||
"into", "is", "it", "no", "not",
|
"into", "is", "it", "no", "not",
|
||||||
|
@ -97,10 +97,10 @@ public class CJKAnalyzer extends Analyzer {
|
||||||
//~ Constructors -----------------------------------------------------------
|
//~ Constructors -----------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer which removes words in STOP_WORDS.
|
* Builds an analyzer which removes words in {@link #STOP_WORDS}.
|
||||||
*/
|
*/
|
||||||
public CJKAnalyzer() {
|
public CJKAnalyzer() {
|
||||||
stopTable = StopFilter.makeStopSet(stopWords);
|
stopTable = StopFilter.makeStopSet(STOP_WORDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -80,7 +80,7 @@ public final class CzechAnalyzer extends Analyzer {
|
||||||
/**
|
/**
|
||||||
* List of typical stopwords.
|
* List of typical stopwords.
|
||||||
*/
|
*/
|
||||||
private static String[] STOP_WORDS = {
|
public final static String[] CZECH_STOP_WORDS = {
|
||||||
"a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
|
"a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
|
||||||
"byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
|
"byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
|
||||||
"jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
|
"jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
|
||||||
|
@ -107,10 +107,10 @@ public final class CzechAnalyzer extends Analyzer {
|
||||||
private Set stoptable;
|
private Set stoptable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer.
|
* Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
|
||||||
*/
|
*/
|
||||||
public CzechAnalyzer() {
|
public CzechAnalyzer() {
|
||||||
stoptable = StopFilter.makeStopSet( STOP_WORDS );
|
stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -84,7 +84,7 @@ public final class FrenchAnalyzer extends Analyzer {
|
||||||
/**
|
/**
|
||||||
* Extended list of typical french stopwords.
|
* Extended list of typical french stopwords.
|
||||||
*/
|
*/
|
||||||
private String[] FRENCH_STOP_WORDS = {
|
public final static String[] FRENCH_STOP_WORDS = {
|
||||||
"a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
|
"a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
|
||||||
"autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
|
"autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
|
||||||
"c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
|
"c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
|
||||||
|
@ -119,7 +119,7 @@ public final class FrenchAnalyzer extends Analyzer {
|
||||||
private Set excltable = new HashSet();
|
private Set excltable = new HashSet();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer.
|
* Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
|
||||||
*/
|
*/
|
||||||
public FrenchAnalyzer() {
|
public FrenchAnalyzer() {
|
||||||
stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
|
stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class DutchAnalyzer extends Analyzer {
|
||||||
/**
|
/**
|
||||||
* List of typical Dutch stopwords.
|
* List of typical Dutch stopwords.
|
||||||
*/
|
*/
|
||||||
private String[] DUTCH_STOP_WORDS =
|
public final static String[] DUTCH_STOP_WORDS =
|
||||||
{
|
{
|
||||||
"de", "en", "van", "ik", "te", "dat", "die", "in", "een",
|
"de", "en", "van", "ik", "te", "dat", "die", "in", "een",
|
||||||
"hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
|
"hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
|
||||||
|
@ -74,7 +74,7 @@ public class DutchAnalyzer extends Analyzer {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an analyzer.
|
* Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}).
|
||||||
*/
|
*/
|
||||||
public DutchAnalyzer() {
|
public DutchAnalyzer() {
|
||||||
stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
|
stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
|
||||||
|
|
Loading…
Reference in New Issue