LUCENE-2285: Code cleanups to remove compiler warnings in eclipse.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@917019 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2010-02-27 19:14:01 +00:00
parent e358c3f2dd
commit efb74380fd
356 changed files with 1280 additions and 1731 deletions

View File

@ -186,7 +186,9 @@ Optimizations
* LUCENE-2195: Speedup CharArraySet if set is empty.
(Simon Willnauer via Robert Muir)
* LUCENE-2285: Code cleanup. (Shai Erera via Uwe Schindler)
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
@ -209,10 +211,11 @@ Test Cases
* LUCENE-2170: Fix thread starvation problems. (Uwe Schindler)
* LUCENE-2248, LUCENE-2251: Refactor tests to not use Version.LUCENE_CURRENT,
but instead use a global static value from LuceneTestCase(J4), that
contains the release version. (Uwe Schindler, Simon Willnauer)
* LUCENE-2248, LUCENE-2251, LUCENE-2285: Refactor tests to not use
Version.LUCENE_CURRENT, but instead use a global static value
from LuceneTestCase(J4), that contains the release version.
(Uwe Schindler, Simon Willnauer, Shai Erera)
================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
Changes in backwards compatibility policy

View File

@ -25,7 +25,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter;
@ -162,14 +161,16 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
this(matchVersion, WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT));
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from an {@link ArabicLetterTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter},
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided
* and {@link ArabicStemFilter}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link ArabicLetterTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerTokenFilter}
* if a stem exclusion set is provided and {@link ArabicStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -24,7 +24,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter;
@ -117,15 +116,18 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet)); }
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link BulgarianStemFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link BulgarianStemFilter}.
*/
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -29,7 +29,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
@ -191,12 +190,16 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
excltable = WordlistLoader.getWordSet( exclusionlist );
setPreviousTokenStream(null); // force a new stemmer to be created
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
* {@link BrazilianStemFilter}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
* , and {@link BrazilianStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cjk;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.cn;
import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
@ -35,11 +34,13 @@ import org.apache.lucene.analysis.Tokenizer;
public final class ChineseAnalyzer extends ReusableAnalyzerBase {
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
* provided {@link Reader}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a
* {@link ChineseTokenizer} filtered with {@link ChineseFilter}
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link ChineseTokenizer} filtered with
* {@link ChineseFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -113,7 +113,7 @@ public class HyphenationCompoundWordTokenFilter extends
* strings.
*/
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, Set dictionary) {
HyphenationTree hyphenator, Set<?> dictionary) {
this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
@ -145,7 +145,7 @@ public class HyphenationCompoundWordTokenFilter extends
* Add only the longest matching subword to the stream
*/
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, Set dictionary, int minWordSize,
HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch);
@ -201,7 +201,7 @@ public class HyphenationCompoundWordTokenFilter extends
*/
@Deprecated
public HyphenationCompoundWordTokenFilter(TokenStream input,
HyphenationTree hyphenator, Set dictionary) {
HyphenationTree hyphenator, Set<?> dictionary) {
this(Version.LUCENE_30, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
@ -223,7 +223,7 @@ public class HyphenationCompoundWordTokenFilter extends
*/
@Deprecated
public HyphenationCompoundWordTokenFilter(TokenStream input,
HyphenationTree hyphenator, Set dictionary, int minWordSize,
HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch);

View File

@ -83,7 +83,7 @@ public class CharVector implements Cloneable, Serializable {
@Override
public Object clone() {
CharVector cv = new CharVector((char[]) array.clone(), blockSize);
CharVector cv = new CharVector(array.clone(), blockSize);
cv.n = this.n;
return cv;
}

View File

@ -26,11 +26,6 @@ public class Hyphenation {
private int[] hyphenPoints;
/**
* number of hyphenation points in word
*/
private int len;
/**
* rawWord as made of alternating strings and {@link Hyphen Hyphen} instances
*/

View File

@ -44,7 +44,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
/**
* This map stores hyphenation exceptions
*/
protected HashMap<String,ArrayList> stoplist;
protected HashMap<String,ArrayList<Object>> stoplist;
/**
* This map stores the character classes
@ -57,7 +57,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
private transient TernaryTree ivalues;
public HyphenationTree() {
stoplist = new HashMap<String,ArrayList>(23); // usually a small table
stoplist = new HashMap<String,ArrayList<Object>>(23); // usually a small table
classmap = new TernaryTree();
vspace = new ByteVector();
vspace.alloc(1); // this reserves index 0, which we don't use
@ -363,7 +363,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
if (stoplist.containsKey(sw)) {
// assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
// null)
ArrayList hw = stoplist.get(sw);
ArrayList<Object> hw = stoplist.get(sw);
int j = 0;
for (i = 0; i < hw.size(); i++) {
Object o = hw.get(i);
@ -443,7 +443,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
* @param hyphenatedword a vector of alternating strings and
* {@link Hyphen hyphen} objects.
*/
public void addException(String word, ArrayList hyphenatedword) {
public void addException(String word, ArrayList<Object> hyphenatedword) {
stoplist.put(word, hyphenatedword);
}

View File

@ -42,7 +42,7 @@ public interface PatternConsumer {
* his own hyphenation. A hyphenatedword is a vector of alternating String's
* and {@link Hyphen Hyphen} instances
*/
void addException(String word, ArrayList hyphenatedword);
void addException(String word, ArrayList<Object> hyphenatedword);
/**
* Add hyphenation patterns.

View File

@ -51,7 +51,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
StringBuilder token;
ArrayList exception;
ArrayList<Object> exception;
char hyphenChar;
@ -199,8 +199,8 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
return pat.toString();
}
protected ArrayList normalizeException(ArrayList ex) {
ArrayList res = new ArrayList();
protected ArrayList<Object> normalizeException(ArrayList<?> ex) {
ArrayList<Object> res = new ArrayList<Object>();
for (int i = 0; i < ex.size(); i++) {
Object item = ex.get(i);
if (item instanceof String) {
@ -230,7 +230,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
return res;
}
protected String getExceptionWord(ArrayList ex) {
protected String getExceptionWord(ArrayList<?> ex) {
StringBuilder res = new StringBuilder();
for (int i = 0; i < ex.size(); i++) {
Object item = ex.get(i);
@ -291,7 +291,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
currElement = ELEM_PATTERNS;
} else if (local.equals("exceptions")) {
currElement = ELEM_EXCEPTIONS;
exception = new ArrayList();
exception = new ArrayList<Object>();
} else if (local.equals("hyphen")) {
if (token.length() > 0) {
exception.add(token.toString());
@ -308,6 +308,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
* java.lang.String, java.lang.String)
*/
@Override
@SuppressWarnings("unchecked")
public void endElement(String uri, String local, String raw) {
if (token.length() > 0) {
@ -319,7 +320,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
case ELEM_EXCEPTIONS:
exception.add(word);
exception = normalizeException(exception);
consumer.addException(getExceptionWord(exception),
consumer.addException(getExceptionWord(exception),
(ArrayList) exception.clone());
break;
case ELEM_PATTERNS:
@ -344,6 +345,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
/**
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
@SuppressWarnings("unchecked")
@Override
public void characters(char ch[], int start, int length) {
StringBuffer chars = new StringBuffer(length);
@ -428,7 +430,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
System.out.println("class: " + c);
}
public void addException(String w, ArrayList e) {
public void addException(String w, ArrayList<Object> e) {
System.out.println("exception: " + w + " : " + e.toString());
}

View File

@ -351,10 +351,10 @@ public class TernaryTree implements Cloneable, Serializable {
@Override
public Object clone() {
TernaryTree t = new TernaryTree();
t.lo = (char[]) this.lo.clone();
t.hi = (char[]) this.hi.clone();
t.eq = (char[]) this.eq.clone();
t.sc = (char[]) this.sc.clone();
t.lo = this.lo.clone();
t.hi = this.hi.clone();
t.eq = this.eq.clone();
t.sc = this.sc.clone();
t.kv = (CharVector) this.kv.clone();
t.root = this.root;
t.freenode = this.freenode;

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cz;
*/
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
@ -216,16 +215,20 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
stoptable = Collections.emptySet();
}
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
* {@link Reader}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, and {@link CzechStemFilter} (only if version is
* >= LUCENE_31). If a version is >= LUCENE_31 and a stem exclusion set
* is provided via {@link #CzechAnalyzer(Version, Set, Set)} a
* {@link KeywordMarkerTokenFilter} is added before {@link CzechStemFilter}.
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
* a version is >= LUCENE_31 and a stem exclusion set is provided via
* {@link #CzechAnalyzer(Version, Set, Set)} a
* {@link KeywordMarkerTokenFilter} is added before
* {@link CzechStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -29,7 +29,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
@ -222,16 +221,17 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
exclusionSet = WordlistLoader.getWordSet(exclusionlist);
setPreviousTokenStream(null); // force a new stemmer to be created
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
* provided {@link Reader}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a
* {@link StandardTokenizer} filtered with {@link StandardFilter},
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, and
* {@link SnowballFilter}
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided, and {@link SnowballFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.el;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
@ -120,15 +119,17 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase
{
this(matchVersion, stopwords.keySet());
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
* provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a
* {@link StandardTokenizer} filtered with
* {@link GreekLowerCaseFilter}, {@link StandardFilter} and {@link StopFilter}
*/
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link GreekLowerCaseFilter}, {@link StandardFilter} and
* {@link StopFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -90,13 +89,16 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link PorterStemFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link PorterStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -25,7 +25,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
@ -136,12 +135,13 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
* {@link Reader}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a {@link ArabicLetterTokenizer}
* filtered with {@link LowerCaseFilter},
* {@link ArabicNormalizationFilter},
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link ArabicLetterTokenizer} filtered with
* {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
* {@link PersianNormalizationFilter} and Persian Stop words
*/
@Override

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.fr;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
@ -225,14 +224,16 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
* {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link ElisionFilter},
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter},
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
* and {@link SnowballFilter}
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided, and {@link SnowballFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -22,7 +22,6 @@ import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter;
@ -106,15 +105,16 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
* {@link Reader}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a {@link IndicTokenizer}
* filtered with {@link LowerCaseFilter},
* {@link IndicNormalizationFilter},
* {@link HindiNormalizationFilter},
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
* {@link HindiStemFilter}, and Hindi Stop words
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link IndicTokenizer} filtered with
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
* {@link HindiNormalizationFilter}, {@link KeywordMarkerTokenFilter}
* if a stem exclusion set is provided, {@link HindiStemFilter}, and
* Hindi Stop words
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -311,7 +311,7 @@ public final class PatternAnalyzer extends Analyzer {
return new String(output, 0, len);
} finally {
if (input != null) input.close();
input.close();
}
}

View File

@ -124,7 +124,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
if (!input.incrementToken()) {
return false;
} else {
curTermBuffer = (char[]) termAtt.termBuffer().clone();
curTermBuffer = termAtt.termBuffer().clone();
curTermLength = termAtt.termLength();
curGramSize = minGram;
tokStart = offsetAtt.startOffset();

View File

@ -79,7 +79,7 @@ public final class NGramTokenFilter extends TokenFilter {
if (!input.incrementToken()) {
return false;
} else {
curTermBuffer = (char[]) termAtt.termBuffer().clone();
curTermBuffer = termAtt.termBuffer().clone();
curTermLength = termAtt.termLength();
curGramSize = minGram;
curPos = 0;

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -192,7 +192,7 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
* if there stopwords, it is a StopFilter around wrapped.
*/
TokenStream withStopFilter;
};
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader)

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -110,13 +109,16 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -26,7 +26,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -160,16 +159,17 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
this(matchVersion, stopwords.keySet());
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
* provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a
* {@link StandardTokenizer} filtered with {@link StandardFilter},
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
* and {@link SnowballFilter}
*/
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided, and {@link SnowballFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -26,7 +26,7 @@ package org.apache.lucene.analysis.ru;
class RussianStemmer
{
// positions of RV, R1 and R2 respectively
private int RV, R1, R2;
private int RV, /*R1,*/ R2;
// letters (currently unused letters are commented out)
private final static char A = '\u0430';
@ -263,11 +263,7 @@ class RussianStemmer
if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
return false;
// if adjective ending was found, try for participle ending.
// variable r is unused, we are just interested in the side effect of
// findAndRemoveEnding():
boolean r =
findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
||
if (!findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors))
findAndRemoveEnding(stemmingZone, participleEndings2);
return true;
}
@ -391,7 +387,7 @@ class RussianStemmer
private void markPositions(String word)
{
RV = 0;
R1 = 0;
// R1 = 0;
R2 = 0;
int i = 0;
// find RV
@ -409,7 +405,7 @@ class RussianStemmer
}
if (word.length() - 1 < ++i)
return; // R1 zone is empty
R1 = i;
// R1 = i;
// find R2
while (word.length() > i && !isVowel(word.charAt(i)))
{
@ -532,13 +528,9 @@ class RussianStemmer
if (!perfectiveGerund(stemmingZone))
{
reflexive(stemmingZone);
// variable r is unused, we are just interested in the flow that gets
// created by logical expression: apply adjectival(); if that fails,
// apply verb() etc
boolean r =
adjectival(stemmingZone)
|| verb(stemmingZone)
|| noun(stemmingZone);
if (!adjectival(stemmingZone))
if (!verb(stemmingZone))
noun(stemmingZone);
}
// Step 2
removeI(stemmingZone);

View File

@ -391,8 +391,8 @@ public final class ShingleFilter extends TokenFilter {
}
/**
* {@see #advance()}
* @return the current value.
* @see #advance()
*/
public int getValue() {
return value;

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.sinks;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter;
@ -42,7 +41,7 @@ public class DateRecognizerSinkFilter extends SinkFilter {
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
*/
public DateRecognizerSinkFilter() {
this(SimpleDateFormat.getDateInstance());
this(DateFormat.getDateInstance());
}
public DateRecognizerSinkFilter(DateFormat dateFormat) {

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.th;
import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
@ -45,12 +44,14 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
}
/**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
* provided {@link Reader}.
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link TokenStreamComponents} built from a
* {@link StandardTokenizer} filtered with {@link StandardFilter},
* {@link ThaiWordFilter}, and {@link StopFilter}
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ThaiWordFilter}, and
* {@link StopFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -109,11 +108,14 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
}
/**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
* {@link Reader}.
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link TurkishLowerCaseFilter},
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
*/

View File

@ -24,7 +24,6 @@ import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.util.Version;
/**
* Test the Arabic Analyzer
@ -35,14 +34,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ArabicAnalyzer(Version.LUCENE_CURRENT);
new ArabicAnalyzer(TEST_VERSION_CURRENT);
}
/**
* Some simple tests showing some features of the analyzer, how some regular forms will conflate
*/
public void testBasicFeatures() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
@ -63,7 +62,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Simple tests to show things are getting reset correctly, etc.
*/
public void testReusableTokenStream() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" });
assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker
}
@ -72,7 +71,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Non-arabic text gets treated in a similar way as SimpleAnalyzer.
*/
public void testEnglishInput() throws Exception {
assertAnalyzesTo(new ArabicAnalyzer(Version.LUCENE_CURRENT), "English text.", new String[] {
assertAnalyzesTo(new ArabicAnalyzer(TEST_VERSION_CURRENT), "English text.", new String[] {
"english", "text" });
}
@ -82,7 +81,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
public void testCustomStopwords() throws Exception {
Set<String> set = new HashSet<String>();
Collections.addAll(set, "the", "and", "a");
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, set);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set);
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
@ -90,12 +89,12 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
public void testWithStemExclusionSet() throws IOException {
Set<String> set = new HashSet<String>();
set.add("ساهدهات");
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
a = new ArabicAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
}

View File

@ -21,11 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
* Test the Arabic Normalization Filter
*
*/
public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
@ -86,7 +84,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
}
private void check(final String input, final String expected) throws IOException {
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader(input));
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
assertTokenStreamContents(filter, new String[]{expected});
}

View File

@ -23,7 +23,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.util.Version;
/**
* Test the Arabic Normalization Filter
@ -116,16 +115,16 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
}
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("ساهدهات");
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader("ساهدهات"));
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerTokenFilter(tokenStream, set));
assertTokenStreamContents(filter, new String[]{"ساهدهات"});
}
private void check(final String input, final String expected) throws IOException {
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader(input));
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
assertTokenStreamContents(filter, new String[]{expected});
}

View File

@ -34,23 +34,23 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath
*/
public void testResourcesAvailable() {
new BulgarianAnalyzer(Version.LUCENE_CURRENT);
new BulgarianAnalyzer(TEST_VERSION_CURRENT);
}
public void testStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"});
}
public void testCustomStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT, Collections
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, Collections
.emptySet());
assertAnalyzesTo(a, "Как се казваш?",
new String[] {"как", "се", "казваш"});
}
public void testReusableTokenStream() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "документи", new String[] {"документ"});
assertAnalyzesToReuse(a, "документ", new String[] {"документ"});
}
@ -59,7 +59,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
* Test some examples from the paper
*/
public void testBasicExamples() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "енергийни кризи", new String[] {"енергийн", "криз"});
assertAnalyzesTo(a, "Атомната енергия", new String[] {"атомн", "енерг"});
@ -72,7 +72,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
public void testWithStemExclusionSet() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("строеве");
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
}
}

View File

@ -35,7 +35,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* common (and some rare) plural pattern is listed.
*/
public void testMasculineNouns() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
// -и pattern
assertAnalyzesTo(a, "град", new String[] {"град"});
@ -101,7 +101,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* Test showing how feminine noun forms conflate
*/
public void testFeminineNouns() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "вест", new String[] {"вест"});
assertAnalyzesTo(a, "вестта", new String[] {"вест"});
@ -114,7 +114,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* plural pattern is listed
*/
public void testNeuterNouns() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
// -а pattern
assertAnalyzesTo(a, "дърво", new String[] {"дърв"});
@ -142,7 +142,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* Test showing how adjectival forms conflate
*/
public void testAdjectives() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "красив", new String[] {"красив"});
assertAnalyzesTo(a, "красивия", new String[] {"красив"});
assertAnalyzesTo(a, "красивият", new String[] {"красив"});
@ -158,7 +158,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* Test some exceptional rules, implemented as rewrites.
*/
public void testExceptions() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
// ци -> к
assertAnalyzesTo(a, "собственик", new String[] {"собственик"});
@ -215,7 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("строеве");
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("строевете строеве"));
BulgarianStemFilter filter = new BulgarianStemFilter(

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.util.Version;
/**
* Test the Brazilian Stem Filter, which only modifies the term text.
@ -128,7 +127,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
checkReuse(a, "boa", "boa");
checkReuse(a, "boainain", "boainain");
checkReuse(a, "boas", "boas");
@ -136,35 +135,35 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
}
public void testStemExclusionTable() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
a.setStemExclusionTable(new String[] { "quintessência" });
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
}
public void testStemExclusionTableBWCompat() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter(
new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader("Brasília Brasilia")), set);
new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Brasília Brasilia")), set));
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
public void testWithKeywordAttributeAndExclusionTable() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília");
CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set1.add("Brasilia");
BrazilianStemFilter filter = new BrazilianStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Brasília Brasilia")), set), set1);
assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
}
@ -174,14 +173,14 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
checkReuse(a, "quintessência", "quintessente");
a.setStemExclusionTable(new String[] { "quintessência" });
checkReuse(a, "quintessência", "quintessência");
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new BrazilianAnalyzer(Version.LUCENE_CURRENT), input, expected);
checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected);
}
private void checkReuse(Analyzer a, String input, String expected) throws Exception {

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.Version;
public class TestCJKTokenizer extends BaseTokenStreamTestCase {
@ -42,7 +41,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
}
public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
String terms[] = new String[out_tokens.length];
int startOffsets[] = new int[out_tokens.length];
int endOffsets[] = new int[out_tokens.length];
@ -57,7 +56,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
}
public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
String terms[] = new String[out_tokens.length];
int startOffsets[] = new int[out_tokens.length];
int endOffsets[] = new int[out_tokens.length];
@ -213,13 +212,13 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
}
public void testTokenStream() throws Exception {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02",
new String[] { "\u4e00\u4e01", "\u4e01\u4e02"});
}
public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
TestToken[] out_tokens = {

View File

@ -28,17 +28,11 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
static final File dataDir = new File(System.getProperty("dataDir", "./bin"));
static final File testFile = new File(dataDir, "org/apache/lucene/analysis/compound/da_UTF8.xml");
@Override
protected void setUp() throws Exception {
super.setUp();
}
public void testHyphenationCompoundWordsDA() throws Exception {
String[] dict = { "læse", "hest" };
@ -47,8 +41,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
.getHyphenationTree(reader);
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(Version.LUCENE_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"min veninde som er lidt af en læsehest")), hyphenator,
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
@ -67,8 +61,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
.getHyphenationTree(reader);
// the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(Version.LUCENE_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"basketballkurv")), hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
@ -84,8 +78,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll",
"Sko", "Vind", "Rute", "Torkare", "Blad" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT,
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
dict);
@ -113,8 +107,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll",
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
@ -129,9 +123,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz",
"Aufgabe", "Überwachung" };
Tokenizer wsTokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Rindfleischüberwachungsgesetz"));
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT,
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
wsTokenizer, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,

View File

@ -48,7 +48,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
}
public void testStopWord() throws Exception {
assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem",
assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem",
new String[] { "mluvim", "voln" });
}
@ -63,7 +63,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" });
}
@ -112,9 +112,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
}
public void testWithStemExclusionSet() throws IOException{
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole");
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
}
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test the Czech Stemmer.
@ -38,7 +37,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how masculine noun forms conflate
*/
public void testMasculineNouns() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* animate ending with a hard consonant */
assertAnalyzesTo(cz, "pán", new String[] { "pán" });
@ -106,7 +105,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how feminine noun forms conflate
*/
public void testFeminineNouns() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* ending with hard consonant */
assertAnalyzesTo(cz, "kost", new String[] { "kost" });
@ -150,7 +149,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how neuter noun forms conflate
*/
public void testNeuterNouns() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* ending with o */
assertAnalyzesTo(cz, "město", new String[] { "měst" });
@ -193,7 +192,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how adjectival forms conflate
*/
public void testAdjectives() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* ending with ý/á/é */
assertAnalyzesTo(cz, "mladý", new String[] { "mlad" });
@ -221,7 +220,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test some possessive suffixes
*/
public void testPossessive() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(cz, "Karlův", new String[] { "karl" });
assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" });
}
@ -230,7 +229,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test some exceptional rules, implemented as rewrites.
*/
public void testExceptions() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* rewrite of št -> sk */
assertAnalyzesTo(cz, "český", new String[] { "česk" });
@ -270,16 +269,16 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test that very short words are not stemmed.
*/
public void testDontStem() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(cz, "e", new String[] { "e" });
assertAnalyzesTo(cz, "zi", new String[] { "zi" });
}
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole");
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerTokenFilter(
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hole desek")), set));
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
}

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new DanishAnalyzer(Version.LUCENE_CURRENT);
new DanishAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "undersøg", "undersøg");
checkOneTermReuse(a, "undersøgelse", "undersøg");
@ -46,7 +45,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("undersøgelse");
Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT,
DanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg");

View File

@ -29,38 +29,38 @@ import org.apache.lucene.util.Version;
public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
public void testReusableTokenStream() throws Exception {
Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "Tisch", "tisch");
checkOneTermReuse(a, "Tische", "tisch");
checkOneTermReuse(a, "Tischen", "tisch");
}
public void testExclusionTableBWCompat() throws IOException {
GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT,
GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT,
new StringReader("Fischen Trinken")));
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen");
filter.setExclusionSet(set);
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen");
GermanStemFilter filter = new GermanStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Fischen Trinken")), set));
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
public void testWithKeywordAttributeAndExclusionTable() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen");
CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set1.add("trinken");
set1.add("fischen");
GermanStemFilter filter = new GermanStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Fischen Trinken")), set));
filter.setExclusionSet(set1);
assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
@ -71,7 +71,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "tischen", "tisch");
a.setStemExclusionTable(new String[] { "tischen" });
checkOneTermReuse(a, "tischen", "tischen");
@ -81,7 +81,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
* these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
*/
public void testGermanSpecials() throws Exception {
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
// a/o/u + e is equivalent to the umlaut form
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflach");

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/**
* Test the German stemmer. The stemming algorithm is known to work less
@ -40,7 +39,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
public void testStemming() throws Exception {
Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer));
TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
// read test cases from external file:
File dataDir = new File(System.getProperty("dataDir", "./bin"));
File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");

View File

@ -32,7 +32,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
* @throws Exception in case an error occurs
*/
public void testAnalyzer() throws Exception {
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
// Verify the correct analysis of capitals and small accented letters
assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
@ -48,7 +48,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
// Verify the correct analysis of capitals and small accented letters
assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new EnglishAnalyzer(Version.LUCENE_CURRENT);
new EnglishAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "books", "book");
checkOneTermReuse(a, "book", "book");
@ -46,7 +45,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("books");
Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT,
EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book");

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new SpanishAnalyzer(Version.LUCENE_CURRENT);
new SpanishAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chican");
@ -46,7 +45,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("chicano");
Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT,
SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano");

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fa;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.Version;
/**
* Test the Persian Analyzer
@ -31,7 +30,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath
*/
public void testResourcesAvailable() {
new PersianAnalyzer(Version.LUCENE_CURRENT);
new PersianAnalyzer(TEST_VERSION_CURRENT);
}
/**
@ -42,7 +41,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
*/
public void testBehaviorVerbs() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
// active present indicative
assertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" });
// active preterite indicative
@ -118,7 +117,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
*/
public void testBehaviorVerbsDefective() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
// active present indicative
assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
// active preterite indicative
@ -189,7 +188,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* nouns, removing the plural -ha.
*/
public void testBehaviorNouns() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" });
}
@ -199,7 +198,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* (lowercased, etc)
*/
public void testBehaviorNonPersian() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
}
@ -207,7 +206,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Basic test ensuring that reusableTokenStream works correctly.
*/
public void testReusableTokenStream() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
assertAnalyzesToReuse(a, "برگ‌ها", new String[] { "برگ" });
}
@ -216,7 +215,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, new String[] { "the", "and", "a" });
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
import org.apache.lucene.util.Version;
/**
* Test the Persian Normalization Filter
@ -55,7 +54,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase {
}
private void check(final String input, final String expected) throws IOException {
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT,
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT,
new StringReader(input));
PersianNormalizationFilter filter = new PersianNormalizationFilter(
tokenStream);

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new FinnishAnalyzer(Version.LUCENE_CURRENT);
new FinnishAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäj");
@ -46,7 +45,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("edeltäjistään");
Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT,
FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/**
*
@ -38,19 +37,19 @@ public class TestElision extends BaseTokenStreamTestCase {
public void testElision() throws Exception {
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
Set articles = new HashSet();
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
Set<String> articles = new HashSet<String>();
articles.add("l");
articles.add("M");
TokenFilter filter = new ElisionFilter(Version.LUCENE_CURRENT, tokenizer, articles);
List tas = filtre(filter);
TokenFilter filter = new ElisionFilter(TEST_VERSION_CURRENT, tokenizer, articles);
List<String> tas = filter(filter);
assertEquals("embrouille", tas.get(4));
assertEquals("O'brian", tas.get(6));
assertEquals("enfin", tas.get(7));
}
private List filtre(TokenFilter filter) throws IOException {
List tas = new ArrayList();
private List<String> filter(TokenFilter filter) throws IOException {
List<String> tas = new ArrayList<String>();
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
while (filter.incrementToken()) {
tas.add(termAtt.term());

View File

@ -32,7 +32,7 @@ import org.apache.lucene.util.Version;
public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
public void testAnalyzer() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(fa, "", new String[] {
});
@ -204,7 +204,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
// stopwords
assertAnalyzesToReuse(
fa,
@ -229,27 +229,27 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
fa.setStemExclusionTable(new String[] { "habitable" });
assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
}
public void testExclusionTableViaCtor() throws Exception {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("habitable");
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT,
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT,
CharArraySet.EMPTY_SET, set);
assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable",
"chist" });
fa = new FrenchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
"chist" });
}
public void testElision() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
}

View File

@ -5,7 +5,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -31,11 +30,11 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new HindiAnalyzer(Version.LUCENE_CURRENT);
new HindiAnalyzer(TEST_VERSION_CURRENT);
}
public void testBasics() throws Exception {
Analyzer a = new HindiAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT);
// two ways to write 'hindi' itself.
checkOneTermReuse(a, "हिन्दी", "हिंद");
checkOneTermReuse(a, "हिंदी", "हिंद");
@ -44,7 +43,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
public void testExclusionSet() throws Exception {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("हिंदी");
Analyzer a = new HindiAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT,
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिंदी", "हिंदी");
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test HindiNormalizer
@ -60,7 +59,7 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
}
private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input));
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test HindiStemmer
@ -82,7 +81,7 @@ public class TestHindiStemmer extends BaseTokenStreamTestCase {
}
private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input));
TokenFilter tf = new HindiStemFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new HungarianAnalyzer(Version.LUCENE_CURRENT);
new HungarianAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "babakocsi", "babakocs");
checkOneTermReuse(a, "babakocsijáért", "babakocs");
@ -46,7 +45,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("babakocsi");
Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT,
HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs");

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test IndicNormalizer
@ -45,7 +44,7 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase {
}
private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input));
TokenFilter tf = new IndicNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* Test IndicTokenizer
@ -30,7 +29,7 @@ import org.apache.lucene.util.Version;
public class TestIndicTokenizer extends BaseTokenStreamTestCase {
/** Test tokenizing Indic vowels, signs, and punctuation */
public void testBasics() throws IOException {
TokenStream ts = new IndicTokenizer(Version.LUCENE_CURRENT,
TokenStream ts = new IndicTokenizer(TEST_VERSION_CURRENT,
new StringReader("मुझे हिंदी का और अभ्यास करना होगा ।"));
assertTokenStreamContents(ts,
new String[] { "मुझे", "हिंदी", "का", "और", "अभ्यास", "करना", "होगा" });
@ -38,7 +37,7 @@ public class TestIndicTokenizer extends BaseTokenStreamTestCase {
/** Test that words with format chars such as ZWJ are kept */
public void testFormat() throws Exception {
TokenStream ts = new IndicTokenizer(Version.LUCENE_CURRENT,
TokenStream ts = new IndicTokenizer(TEST_VERSION_CURRENT,
new StringReader("शार्‍मा शार्‍मा"));
assertTokenStreamContents(ts, new String[] { "शार्‍मा", "शार्‍मा" });
}

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ItalianAnalyzer(Version.LUCENE_CURRENT);
new ItalianAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "abbandonata", "abbandon");
checkOneTermReuse(a, "abbandonati", "abbandon");
@ -46,7 +45,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("abbandonata");
Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT,
ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon");

View File

@ -24,7 +24,6 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* Verifies the behavior of PatternAnalyzer.
@ -37,13 +36,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox", "the", "abcd", "dc" });
// split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox", "abcd", "dc" });
@ -55,13 +54,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
// Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
@ -73,12 +72,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), false, null);
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
"Are", "some", "Comma", "separated", "words" });
// split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), true,
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
"some", "comma", "separated", "words" });
@ -103,7 +102,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
document.append(largeWord2);
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null);
check(a, document.toString(), new String[] { new String(largeWord),
new String(largeWord2) });

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -31,7 +30,7 @@ public class TestPrefixAndSuffixAwareTokenFilter extends BaseTokenStreamTestCase
PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
new SingleTokenTokenStream(createToken("^", 0, 0)),
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hello world")),
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")),
new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -42,7 +41,7 @@ public class TestPrefixAwareTokenFilter extends BaseTokenStreamTestCase {
// prefix and suffix using 2x prefix
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hello world")));
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")));
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,

View File

@ -10,7 +10,6 @@ import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -38,7 +37,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
dictionary.put("booked", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
TokenStream stream = new PorterStemFilter(
new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
assertTokenStreamContents(stream, new String[] { "books" });
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
import java.io.StringReader;
@ -31,9 +30,9 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
private TokenStream input;
@Override
public void setUp() throws Exception {
protected void setUp() throws Exception {
super.setUp();
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
}
public void testInvalidInput() throws Exception {
@ -92,13 +91,13 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
}
public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh"));
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
tokenizer.reset(new StringReader("abcde"));

View File

@ -29,7 +29,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase {
private StringReader input;
@Override
public void setUp() throws Exception {
protected void setUp() throws Exception {
super.setUp();
input = new StringReader("abcde");
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
import java.io.StringReader;
@ -31,9 +30,9 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
private TokenStream input;
@Override
public void setUp() throws Exception {
protected void setUp() throws Exception {
super.setUp();
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
}
public void testInvalidInput() throws Exception {
@ -81,13 +80,13 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
}
public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh"));
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
tokenizer.reset(new StringReader("abcde"));

View File

@ -29,7 +29,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase {
private StringReader input;
@Override
public void setUp() throws Exception {
protected void setUp() throws Exception {
super.setUp();
input = new StringReader("abcde");
}

View File

@ -127,14 +127,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
}
public void testSnowballCorrectness() throws Exception {
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "opheffen", "opheff");
checkOneTermReuse(a, "opheffende", "opheff");
checkOneTermReuse(a, "opheffing", "opheff");
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
checkOneTermReuse(a, "lichamelijk", "licham");
checkOneTermReuse(a, "lichamelijke", "licham");
@ -146,7 +146,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "lichamelijk", "licham");
a.setStemExclusionTable(new String[] { "lichamelijk" });
checkOneTermReuse(a, "lichamelijk", "lichamelijk");
@ -157,10 +157,10 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
public void testExclusionTableViaCtor() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
set.add("lichamelijk");
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
a = new DutchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
}
@ -170,7 +170,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testStemDictionaryReuse() throws Exception {
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "lichamelijk", "licham");
a.setStemDictionary(customDictFile);
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
@ -196,7 +196,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
}
}

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new NorwegianAnalyzer(Version.LUCENE_CURRENT);
new NorwegianAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
@ -46,7 +45,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("havnedistriktene");
Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT,
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");

View File

@ -22,21 +22,15 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import java.io.StringReader;
/**
*
*
**/
public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testPayloads() throws Exception {
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)),
(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
@ -57,7 +51,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)),
(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
assertTermEquals("The", filter, null);
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
@ -75,7 +69,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testFloatEncoding() throws Exception {
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), '|', new FloatEncoder());
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
@ -93,7 +87,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -38,7 +37,7 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))), 3, "D");
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
boolean seenDogs = false;
TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);

View File

@ -21,7 +21,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -36,7 +35,7 @@ public class TokenOffsetPayloadTokenFilterTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)));
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
int count = 0;
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -38,7 +37,7 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))));
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
int count = 0;
TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
@ -48,7 +47,6 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
assertTrue("type is null and it shouldn't be", type != null);
assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
count++;
}

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new PortugueseAnalyzer(Version.LUCENE_CURRENT);
new PortugueseAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "quilométricas", "quilométr");
checkOneTermReuse(a, "quilométricos", "quilométr");
@ -46,7 +45,7 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("quilométricas");
Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT,
PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "quilométricas", "quilométricas");
checkOneTermReuse(a, "quilométricos", "quilométr");

View File

@ -37,7 +37,6 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
@ -51,7 +50,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
protected void setUp() throws Exception {
super.setUp();
dir = new RAMDirectory();
appAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
appAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
IndexWriter writer = new IndexWriter(dir, appAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
int numDocs = 200;
for (int i = 0; i < numDocs; i++) {
@ -64,7 +63,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
}
writer.close();
reader = IndexReader.open(dir, true);
protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer);
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer);
}
@Override
@ -75,7 +74,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
//Helper method to query
private int search(Analyzer a, String queryString) throws IOException, ParseException {
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "repetitiveField", a);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
Query q = qp.parse(queryString);
return new IndexSearcher(reader).search(q, null, 1000).totalHits;
}
@ -157,14 +156,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
return new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
else
return new LetterTokenizer(Version.LUCENE_CURRENT, reader);
return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
}
}
public void testWrappingNonReusableAnalyzer() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer());
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
a.addStopWords(reader, 10);
int numHits = search(a, "repetitiveField:boring");
assertTrue(numHits == 0);
@ -173,7 +172,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
}
public void testTokenStream() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
a.addStopWords(reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);

View File

@ -27,9 +27,9 @@ import org.apache.lucene.util.Version;
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testFilter() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("Do have a nice day")); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(Version.LUCENE_CURRENT, stream);
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
TermAttribute text = filter.getAttribute(TermAttribute.class);
assertTrue(filter.incrementToken());
assertEquals("oD", text.term());
@ -45,9 +45,9 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
}
public void testFilterWithMark() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Do have a nice day")); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(Version.LUCENE_CURRENT, stream, '\u0001');
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
TermAttribute text = filter
.getAttribute(TermAttribute.class);
assertTrue(filter.incrementToken());
@ -64,14 +64,14 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
}
public void testReverseString() throws Exception {
assertEquals( "A", ReverseStringFilter.reverse( "A" ) );
assertEquals( "BA", ReverseStringFilter.reverse( "AB" ) );
assertEquals( "CBA", ReverseStringFilter.reverse( "ABC" ) );
assertEquals( "A", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "A" ) );
assertEquals( "BA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "AB" ) );
assertEquals( "CBA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "ABC" ) );
}
public void testReverseChar() throws Exception {
char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' };
ReverseStringFilter.reverse( buffer, 2, 3 );
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 2, 3 );
assertEquals( "ABEDCF", new String( buffer ) );
}
@ -84,37 +84,37 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testReverseSupplementary() throws Exception {
// supplementary at end
assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅"));
assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅"));
// supplementary at end - 1
assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅a"));
assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅a"));
// supplementary at start
assertEquals("fedcba𩬅", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "𩬅abcdef"));
assertEquals("fedcba𩬅", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "𩬅abcdef"));
// supplementary at start + 1
assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "z𩬅abcdef"));
assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "z𩬅abcdef"));
// supplementary medial
assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "abcd𩬅efg"));
assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "abcd𩬅efg"));
}
public void testReverseSupplementaryChar() throws Exception {
// supplementary at end
char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer));
// supplementary at end - 1
buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8);
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8);
assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer));
// supplementary at start
buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer));
// supplementary at start + 1
buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8);
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8);
assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer));
// supplementary medial
buffer = "abc瀛愯𩬅def".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
assertEquals("abcfed𩬅愯瀛", new String(buffer));
}
}

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new RomanianAnalyzer(Version.LUCENE_CURRENT);
new RomanianAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "absenţa", "absenţ");
checkOneTermReuse(a, "absenţi", "absenţ");
@ -46,7 +45,7 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("absenţa");
Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT,
RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "absenţa", "absenţa");
checkOneTermReuse(a, "absenţi", "absenţ");

View File

@ -44,8 +44,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
private File dataDir;
@Override
protected void setUp() throws Exception
{
protected void setUp() throws Exception {
super.setUp();
dataDir = new File(System.getProperty("dataDir", "./bin"));
}
@ -71,7 +70,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
TokenStream in = ra.tokenStream("all", inWords);
RussianLetterTokenizer sample =
new RussianLetterTokenizer(Version.LUCENE_CURRENT,
new RussianLetterTokenizer(TEST_VERSION_CURRENT,
sampleUnicode);
TermAttribute text = in.getAttribute(TermAttribute.class);
@ -98,7 +97,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
public void testDigitsInRussianCharset()
{
Reader reader = new StringReader("text 1000");
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
TokenStream stream = ra.tokenStream("", reader);
TermAttribute termText = stream.getAttribute(TermAttribute.class);
@ -126,7 +125,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
@ -135,9 +134,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
public void testWithStemExclusionSet() throws Exception {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("представление");
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });

View File

@ -30,8 +30,8 @@ import java.util.ArrayList;
@Deprecated
public class TestRussianStem extends LuceneTestCase
{
private ArrayList words = new ArrayList();
private ArrayList stems = new ArrayList();
private ArrayList<String> words = new ArrayList<String>();
private ArrayList<String> stems = new ArrayList<String>();
public TestRussianStem(String name)
{
@ -42,8 +42,7 @@ public class TestRussianStem extends LuceneTestCase
* @see TestCase#setUp()
*/
@Override
protected void setUp() throws Exception
{
protected void setUp() throws Exception {
super.setUp();
//System.out.println(new java.util.Date());
String str;
@ -75,15 +74,6 @@ public class TestRussianStem extends LuceneTestCase
inStems.close();
}
/**
* @see TestCase#tearDown()
*/
@Override
protected void tearDown() throws Exception
{
super.tearDown();
}
public void testStem()
{
for (int i = 0; i < words.size(); i++)
@ -91,7 +81,7 @@ public class TestRussianStem extends LuceneTestCase
//if ( (i % 100) == 0 ) System.err.println(i);
String realStem =
RussianStemmer.stemWord(
(String) words.get(i));
words.get(i));
assertEquals("unicode", stems.get(i), realStem);
}
}

View File

@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
* A test class for ShingleAnalyzerWrapper as regards queries and scoring.
@ -86,7 +85,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
searcher = setUpSearcher(analyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
Query q = qp.parse(qs);
@ -106,7 +105,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"test sentence");
int[] ranks = new int[] { 1, 2, 0 };
compareRanks(hits, ranks);
@ -117,7 +116,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"\"this sentence\"");
int[] ranks = new int[] { 0 };
compareRanks(hits, ranks);
@ -128,7 +127,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"\"test sentence\"");
int[] ranks = new int[] { 1 };
compareRanks(hits, ranks);
@ -139,7 +138,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"+test +sentence");
int[] ranks = new int[] { 1, 2 };
compareRanks(hits, ranks);
@ -149,7 +148,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
* This shows how to construct a phrase query containing shingles.
*/
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2);
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
searcher = setUpSearcher(analyzer);
PhraseQuery q = new PhraseQuery();
@ -178,7 +177,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
* in the right order and adjacent to each other.
*/
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2);
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
searcher = setUpSearcher(analyzer);
BooleanQuery q = new BooleanQuery();
@ -200,7 +199,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2);
Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
assertAnalyzesToReuse(a, "please divide into shingles",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
@ -222,9 +221,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
return new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
else
return new LetterTokenizer(Version.LUCENE_CURRENT, reader);
return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
}
}
@ -249,7 +248,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 3, 4);
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 4);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this", "please divide this sentence",
"divide", "divide this sentence", "divide this sentence into",
@ -273,7 +272,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 3, 3);
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 3);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this",
"divide", "divide this sentence",
@ -297,7 +296,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNoTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer());
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
analyzer.setTokenSeparator("");
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
@ -319,7 +318,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNullTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer());
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
analyzer.setTokenSeparator(null);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
@ -340,7 +339,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
}
public void testAltTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer());
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
analyzer.setTokenSeparator("<SEP>");
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "please<SEP>divide",

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Version;
public class ShingleFilterTest extends BaseTokenStreamTestCase {
@ -836,7 +835,7 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase {
public void testReset() throws Exception {
Tokenizer wsTokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("please divide this sentence"));
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
TokenStream filter = new ShingleFilter(wsTokenizer, 2);
assertTokenStreamContents(filter,
new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Version;
public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
@ -41,11 +40,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
public void testIterator() throws IOException {
WhitespaceTokenizer wst = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("one two three four five"));
WhitespaceTokenizer wst = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("one two three four five"));
ShingleMatrixFilter smf = new ShingleMatrixFilter(wst, 2, 2, '_', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
int i;
for(i=0; smf.incrementToken(); i++);
for(i=0; smf.incrementToken(); i++) {}
assertEquals(4, i);
// call next once more. this should return false again rather than throwing an exception (LUCENE-1939)
@ -65,11 +64,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
assertFalse(ts.incrementToken());
TokenListStream tls;
LinkedList tokens;
LinkedList<Token> tokens;
// test a plain old token stream with synonyms translated to rows.
tokens = new LinkedList();
tokens = new LinkedList<Token>();
tokens.add(createToken("please", 0, 6));
tokens.add(createToken("divide", 7, 13));
tokens.add(createToken("this", 14, 18));
@ -101,11 +100,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
TokenStream ts;
TokenStream tls;
LinkedList tokens;
LinkedList<Token> tokens;
// test a plain old token stream with synonyms tranlated to rows.
tokens = new LinkedList();
tokens = new LinkedList<Token>();
tokens.add(tokenFactory("hello", 1, 0, 4));
tokens.add(tokenFactory("greetings", 0, 0, 4));
tokens.add(tokenFactory("world", 1, 5, 10));
@ -145,7 +144,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
tokens = new LinkedList();
tokens = new LinkedList<Token>();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn));
@ -286,7 +285,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
//
tokens = new LinkedList();
tokens = new LinkedList<Token>();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow));
@ -413,11 +412,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
}
private Token tokenFactory(String text, int startOffset, int endOffset) {
return tokenFactory(text, 1, 1f, startOffset, endOffset);
}
private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) {
Token token = new Token(startOffset, endOffset);
token.setTermBuffer(text);
@ -430,10 +424,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
return tokenFactory(text, posIncr, 1f, 0, 0);
}
private Token tokenFactory(String text, int posIncr, float weight) {
return tokenFactory(text, posIncr, weight, 0, 0);
}
private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) {
Token token = new Token(startOffset, endOffset);
token.setTermBuffer(text);
@ -460,17 +450,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
assertEquals(text, termAtt.term());
}
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost) throws IOException {
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
assertTrue(ts.incrementToken());
assertEquals(text, termAtt.term());
assertEquals(positionIncrement, posIncrAtt.getPositionIncrement());
assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0);
}
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException {
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
@ -505,7 +484,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
public static class TokenListStream extends TokenStream {
private Collection tokens;
private Collection<Token> tokens;
TermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
PayloadAttribute payloadAtt;
@ -513,7 +492,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
TypeAttribute typeAtt;
FlagsAttribute flagsAtt;
public TokenListStream(Collection tokens) {
public TokenListStream(Collection<Token> tokens) {
this.tokens = tokens;
termAtt = addAttribute(TermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@ -523,7 +502,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
flagsAtt = addAttribute(FlagsAttribute.class);
}
private Iterator iterator;
private Iterator<Token> iterator;
@Override
public boolean incrementToken() throws IOException {
@ -533,7 +512,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
if (!iterator.hasNext()) {
return false;
}
Token prototype = (Token) iterator.next();
Token prototype = iterator.next();
clearAttributes();
termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.util.Version;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
@ -37,7 +36,7 @@ public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)));
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
int count = 0;

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.util.Version;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
@ -35,7 +34,7 @@ public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
String test = "The quick red fox jumped over the lazy brown dogs";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)));
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
int count = 0;

View File

@ -27,11 +27,9 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
public TokenTypeSinkTokenizerTest(String s) {
super(s);
}
@ -40,7 +38,7 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
String test = "The quick red fox jumped over the lazy brown dogs";
TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))));
TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
boolean seenDogs = false;

View File

@ -33,13 +33,13 @@ import org.apache.lucene.util.Version;
public class TestSnowball extends BaseTokenStreamTestCase {
public void testEnglish() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
assertAnalyzesTo(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
}
public void testStopwords() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English",
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English",
StandardAnalyzer.STOP_WORDS_SET);
assertAnalyzesTo(a, "the quick brown fox jumped",
new String[]{"quick", "brown", "fox", "jump"});
@ -50,7 +50,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
* we lowercase I correct for non-Turkish languages in either case.
*/
public void testEnglishLowerCase() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" });
assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" });
@ -63,7 +63,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
* Test turkish lowercasing
*/
public void testTurkish() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "Turkish");
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish");
assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
@ -84,7 +84,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
public void testReusableTokenStream() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
assertAnalyzesToReuse(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
assertAnalyzesToReuse(a, "she abhorred him",

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new SwedishAnalyzer(Version.LUCENE_CURRENT);
new SwedishAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
@ -46,7 +45,7 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("jaktkarlarne");
Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT,
SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.th;
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
* Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
@ -32,7 +31,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* testcase for offsets
*/
public void testOffsets() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์",
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
new int[] { 0, 2, 7, 9, 12 },
new int[] { 2, 7, 9, 12, 17});
@ -50,7 +49,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
*/
public void testBuggyTokenType() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
}
@ -64,7 +63,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
*/
public void testAnalyzer() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "", new String[] {});
@ -89,7 +88,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* Test that position increments are adjusted correctly for stopwords.
*/
public void testPositionIncrements() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
@ -106,7 +105,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(analyzer, "", new String[] {});
assertAnalyzesToReuse(

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new TurkishAnalyzer(Version.LUCENE_CURRENT);
new TurkishAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "ağacı", "ağaç");
checkOneTermReuse(a, "ağaç", "ağaç");
@ -46,7 +45,7 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("ağacı");
Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT,
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT,
TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "ağacı", "ağacı");
checkOneTermReuse(a, "ağaç", "ağaç");

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test the Turkish lowercase filter.
@ -33,7 +32,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
* Test composed forms
*/
public void testTurkishLowerCaseFilter() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"\u0130STANBUL \u0130ZM\u0130R ISPARTA"));
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
@ -44,7 +43,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
* Test decomposed forms
*/
public void testDecomposed() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"));
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
@ -57,7 +56,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
* to U+0130 + U+0316, and is lowercased the same way.
*/
public void testDecomposed2() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"));
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",

View File

@ -20,8 +20,6 @@ package org.apache.lucene.ant;
import java.io.File;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
@ -31,13 +29,13 @@ import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.FSDirectory;
import org.apache.tools.ant.Project;
import org.apache.tools.ant.types.FileSet;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.LuceneTestCase;
/**
* Test cases for index task
*
*/
public class IndexTaskTest extends TestCase {
public class IndexTaskTest extends LuceneTestCase {
private final static String docHandler =
"org.apache.lucene.ant.FileExtensionDocumentHandler";
@ -55,7 +53,8 @@ public class IndexTaskTest extends TestCase {
*@exception IOException Description of Exception
*/
@Override
public void setUp() throws Exception {
protected void setUp() throws Exception {
super.setUp();
Project project = new Project();
IndexTask task = new IndexTask();
@ -71,12 +70,12 @@ public class IndexTaskTest extends TestCase {
dir = FSDirectory.open(indexDir);
searcher = new IndexSearcher(dir, true);
analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
}
public void testSearch() throws Exception {
Query query = new QueryParser(Version.LUCENE_CURRENT, "contents",analyzer).parse("test");
Query query = new QueryParser(TEST_VERSION_CURRENT, "contents",analyzer).parse("test");
int numHits = searcher.search(query, null, 1000).totalHits;
@ -88,9 +87,10 @@ public class IndexTaskTest extends TestCase {
* TODO: remove indexDir?
*/
@Override
public void tearDown() throws IOException {
protected void tearDown() throws Exception {
searcher.close();
dir.close();
super.tearDown();
}
}

Some files were not shown because too many files have changed in this diff Show More