LUCENE-2285: Code cleanups to remove compiler warnings in eclipse.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@917019 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2010-02-27 19:14:01 +00:00
parent e358c3f2dd
commit efb74380fd
356 changed files with 1280 additions and 1731 deletions

View File

@ -187,6 +187,8 @@ Optimizations
* LUCENE-2195: Speedup CharArraySet if set is empty. * LUCENE-2195: Speedup CharArraySet if set is empty.
(Simon Willnauer via Robert Muir) (Simon Willnauer via Robert Muir)
* LUCENE-2285: Code cleanup. (Shai Erera via Uwe Schindler)
Build Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation * LUCENE-2124: Moved the JDK-based collation support from contrib/collation
@ -209,9 +211,10 @@ Test Cases
* LUCENE-2170: Fix thread starvation problems. (Uwe Schindler) * LUCENE-2170: Fix thread starvation problems. (Uwe Schindler)
* LUCENE-2248, LUCENE-2251: Refactor tests to not use Version.LUCENE_CURRENT, * LUCENE-2248, LUCENE-2251, LUCENE-2285: Refactor tests to not use
but instead use a global static value from LuceneTestCase(J4), that Version.LUCENE_CURRENT, but instead use a global static value
contains the release version. (Uwe Schindler, Simon Willnauer) from LuceneTestCase(J4), that contains the release version.
(Uwe Schindler, Simon Willnauer, Shai Erera)
================== Release 2.9.2 / 3.0.1 2010-02-26 ==================== ================== Release 2.9.2 / 3.0.1 2010-02-26 ====================

View File

@ -25,7 +25,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
@ -162,14 +161,16 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
this(matchVersion, WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT)); this(matchVersion, WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT));
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}. * Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from an {@link ArabicLetterTokenizer} filtered with * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}, * built from an {@link ArabicLetterTokenizer} filtered with
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided * {@link LowerCaseFilter}, {@link StopFilter},
* and {@link ArabicStemFilter}. * {@link ArabicNormalizationFilter}, {@link KeywordMarkerTokenFilter}
* if a stem exclusion set is provided and {@link ArabicStemFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -24,7 +24,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
@ -119,13 +118,16 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
matchVersion, stemExclusionSet)); } matchVersion, stemExclusionSet)); }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link BulgarianStemFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link BulgarianStemFilter}.
*/ */
@Override @Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -29,7 +29,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
@ -191,12 +190,16 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
excltable = WordlistLoader.getWordSet( exclusionlist ); excltable = WordlistLoader.getWordSet( exclusionlist );
setPreviousTokenStream(null); // force a new stemmer to be created setPreviousTokenStream(null); // force a new stemmer to be created
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}. * Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} filtered with * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and * built from a {@link StandardTokenizer} filtered with
* {@link BrazilianStemFilter}. * {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
* , and {@link BrazilianStemFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cjk;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.cn;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
@ -35,11 +34,13 @@ import org.apache.lucene.analysis.Tokenizer;
public final class ChineseAnalyzer extends ReusableAnalyzerBase { public final class ChineseAnalyzer extends ReusableAnalyzerBase {
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the * Creates
* provided {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link ChineseTokenizer} filtered with {@link ChineseFilter} * built from a {@link ChineseTokenizer} filtered with
* {@link ChineseFilter}
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -113,7 +113,7 @@ public class HyphenationCompoundWordTokenFilter extends
* strings. * strings.
*/ */
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, Set dictionary) { HyphenationTree hyphenator, Set<?> dictionary) {
this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false); DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
} }
@ -145,7 +145,7 @@ public class HyphenationCompoundWordTokenFilter extends
* Add only the longest matching subword to the stream * Add only the longest matching subword to the stream
*/ */
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
HyphenationTree hyphenator, Set dictionary, int minWordSize, HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch); onlyLongestMatch);
@ -201,7 +201,7 @@ public class HyphenationCompoundWordTokenFilter extends
*/ */
@Deprecated @Deprecated
public HyphenationCompoundWordTokenFilter(TokenStream input, public HyphenationCompoundWordTokenFilter(TokenStream input,
HyphenationTree hyphenator, Set dictionary) { HyphenationTree hyphenator, Set<?> dictionary) {
this(Version.LUCENE_30, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, this(Version.LUCENE_30, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false); DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
} }
@ -223,7 +223,7 @@ public class HyphenationCompoundWordTokenFilter extends
*/ */
@Deprecated @Deprecated
public HyphenationCompoundWordTokenFilter(TokenStream input, public HyphenationCompoundWordTokenFilter(TokenStream input,
HyphenationTree hyphenator, Set dictionary, int minWordSize, HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch); onlyLongestMatch);

View File

@ -83,7 +83,7 @@ public class CharVector implements Cloneable, Serializable {
@Override @Override
public Object clone() { public Object clone() {
CharVector cv = new CharVector((char[]) array.clone(), blockSize); CharVector cv = new CharVector(array.clone(), blockSize);
cv.n = this.n; cv.n = this.n;
return cv; return cv;
} }

View File

@ -26,11 +26,6 @@ public class Hyphenation {
private int[] hyphenPoints; private int[] hyphenPoints;
/**
* number of hyphenation points in word
*/
private int len;
/** /**
* rawWord as made of alternating strings and {@link Hyphen Hyphen} instances * rawWord as made of alternating strings and {@link Hyphen Hyphen} instances
*/ */

View File

@ -44,7 +44,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
/** /**
* This map stores hyphenation exceptions * This map stores hyphenation exceptions
*/ */
protected HashMap<String,ArrayList> stoplist; protected HashMap<String,ArrayList<Object>> stoplist;
/** /**
* This map stores the character classes * This map stores the character classes
@ -57,7 +57,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
private transient TernaryTree ivalues; private transient TernaryTree ivalues;
public HyphenationTree() { public HyphenationTree() {
stoplist = new HashMap<String,ArrayList>(23); // usually a small table stoplist = new HashMap<String,ArrayList<Object>>(23); // usually a small table
classmap = new TernaryTree(); classmap = new TernaryTree();
vspace = new ByteVector(); vspace = new ByteVector();
vspace.alloc(1); // this reserves index 0, which we don't use vspace.alloc(1); // this reserves index 0, which we don't use
@ -363,7 +363,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
if (stoplist.containsKey(sw)) { if (stoplist.containsKey(sw)) {
// assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
// null) // null)
ArrayList hw = stoplist.get(sw); ArrayList<Object> hw = stoplist.get(sw);
int j = 0; int j = 0;
for (i = 0; i < hw.size(); i++) { for (i = 0; i < hw.size(); i++) {
Object o = hw.get(i); Object o = hw.get(i);
@ -443,7 +443,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
* @param hyphenatedword a vector of alternating strings and * @param hyphenatedword a vector of alternating strings and
* {@link Hyphen hyphen} objects. * {@link Hyphen hyphen} objects.
*/ */
public void addException(String word, ArrayList hyphenatedword) { public void addException(String word, ArrayList<Object> hyphenatedword) {
stoplist.put(word, hyphenatedword); stoplist.put(word, hyphenatedword);
} }

View File

@ -42,7 +42,7 @@ public interface PatternConsumer {
* his own hyphenation. A hyphenatedword is a vector of alternating String's * his own hyphenation. A hyphenatedword is a vector of alternating String's
* and {@link Hyphen Hyphen} instances * and {@link Hyphen Hyphen} instances
*/ */
void addException(String word, ArrayList hyphenatedword); void addException(String word, ArrayList<Object> hyphenatedword);
/** /**
* Add hyphenation patterns. * Add hyphenation patterns.

View File

@ -51,7 +51,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
StringBuilder token; StringBuilder token;
ArrayList exception; ArrayList<Object> exception;
char hyphenChar; char hyphenChar;
@ -199,8 +199,8 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
return pat.toString(); return pat.toString();
} }
protected ArrayList normalizeException(ArrayList ex) { protected ArrayList<Object> normalizeException(ArrayList<?> ex) {
ArrayList res = new ArrayList(); ArrayList<Object> res = new ArrayList<Object>();
for (int i = 0; i < ex.size(); i++) { for (int i = 0; i < ex.size(); i++) {
Object item = ex.get(i); Object item = ex.get(i);
if (item instanceof String) { if (item instanceof String) {
@ -230,7 +230,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
return res; return res;
} }
protected String getExceptionWord(ArrayList ex) { protected String getExceptionWord(ArrayList<?> ex) {
StringBuilder res = new StringBuilder(); StringBuilder res = new StringBuilder();
for (int i = 0; i < ex.size(); i++) { for (int i = 0; i < ex.size(); i++) {
Object item = ex.get(i); Object item = ex.get(i);
@ -291,7 +291,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
currElement = ELEM_PATTERNS; currElement = ELEM_PATTERNS;
} else if (local.equals("exceptions")) { } else if (local.equals("exceptions")) {
currElement = ELEM_EXCEPTIONS; currElement = ELEM_EXCEPTIONS;
exception = new ArrayList(); exception = new ArrayList<Object>();
} else if (local.equals("hyphen")) { } else if (local.equals("hyphen")) {
if (token.length() > 0) { if (token.length() > 0) {
exception.add(token.toString()); exception.add(token.toString());
@ -308,6 +308,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
* java.lang.String, java.lang.String) * java.lang.String, java.lang.String)
*/ */
@Override @Override
@SuppressWarnings("unchecked")
public void endElement(String uri, String local, String raw) { public void endElement(String uri, String local, String raw) {
if (token.length() > 0) { if (token.length() > 0) {
@ -344,6 +345,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
/** /**
* @see org.xml.sax.ContentHandler#characters(char[], int, int) * @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/ */
@SuppressWarnings("unchecked")
@Override @Override
public void characters(char ch[], int start, int length) { public void characters(char ch[], int start, int length) {
StringBuffer chars = new StringBuffer(length); StringBuffer chars = new StringBuffer(length);
@ -428,7 +430,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
System.out.println("class: " + c); System.out.println("class: " + c);
} }
public void addException(String w, ArrayList e) { public void addException(String w, ArrayList<Object> e) {
System.out.println("exception: " + w + " : " + e.toString()); System.out.println("exception: " + w + " : " + e.toString());
} }

View File

@ -351,10 +351,10 @@ public class TernaryTree implements Cloneable, Serializable {
@Override @Override
public Object clone() { public Object clone() {
TernaryTree t = new TernaryTree(); TernaryTree t = new TernaryTree();
t.lo = (char[]) this.lo.clone(); t.lo = this.lo.clone();
t.hi = (char[]) this.hi.clone(); t.hi = this.hi.clone();
t.eq = (char[]) this.eq.clone(); t.eq = this.eq.clone();
t.sc = (char[]) this.sc.clone(); t.sc = this.sc.clone();
t.kv = (CharVector) this.kv.clone(); t.kv = (CharVector) this.kv.clone();
t.root = this.root; t.root = this.root;
t.freenode = this.freenode; t.freenode = this.freenode;

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cz;
*/ */
import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
@ -216,16 +215,20 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
stoptable = Collections.emptySet(); stoptable = Collections.emptySet();
} }
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided * Creates
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * built from a {@link StandardTokenizer} filtered with
* {@link StopFilter}, and {@link CzechStemFilter} (only if version is * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* >= LUCENE_31). If a version is >= LUCENE_31 and a stem exclusion set * , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
* is provided via {@link #CzechAnalyzer(Version, Set, Set)} a * a version is >= LUCENE_31 and a stem exclusion set is provided via
* {@link KeywordMarkerTokenFilter} is added before {@link CzechStemFilter}. * {@link #CzechAnalyzer(Version, Set, Set)} a
* {@link KeywordMarkerTokenFilter} is added before
* {@link CzechStemFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -29,7 +29,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
@ -224,14 +223,15 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the * Creates
* provided {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StandardTokenizer} filtered with {@link StandardFilter}, * built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter}, * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, and * , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* {@link SnowballFilter} * provided, and {@link SnowballFilter}
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.el;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -122,12 +121,14 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the * Creates
* provided {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StandardTokenizer} filtered with * built from a {@link StandardTokenizer} filtered with
* {@link GreekLowerCaseFilter}, {@link StandardFilter} and {@link StopFilter} * {@link GreekLowerCaseFilter}, {@link StandardFilter} and
* {@link StopFilter}
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -90,13 +89,16 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link PorterStemFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link PorterStemFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -25,7 +25,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -136,12 +135,13 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided * Creates
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a {@link ArabicLetterTokenizer} * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* filtered with {@link LowerCaseFilter}, * built from a {@link ArabicLetterTokenizer} filtered with
* {@link ArabicNormalizationFilter}, * {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
* {@link PersianNormalizationFilter} and Persian Stop words * {@link PersianNormalizationFilter} and Persian Stop words
*/ */
@Override @Override

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.fr;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
@ -225,14 +224,16 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided * Creates
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* filtered with {@link StandardFilter}, {@link ElisionFilter}, * built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter},
* {@link LowerCaseFilter}, {@link StopFilter}, * {@link LowerCaseFilter}, {@link StopFilter},
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, * {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* and {@link SnowballFilter} * provided, and {@link SnowballFilter}
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -22,7 +22,6 @@ import java.io.Reader;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
@ -106,15 +105,16 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided * Creates
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a {@link IndicTokenizer} * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* filtered with {@link LowerCaseFilter}, * built from a {@link IndicTokenizer} filtered with
* {@link IndicNormalizationFilter}, * {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
* {@link HindiNormalizationFilter}, * {@link HindiNormalizationFilter}, {@link KeywordMarkerTokenFilter}
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, * if a stem exclusion set is provided, {@link HindiStemFilter}, and
* {@link HindiStemFilter}, and Hindi Stop words * Hindi Stop words
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -311,7 +311,7 @@ public final class PatternAnalyzer extends Analyzer {
return new String(output, 0, len); return new String(output, 0, len);
} finally { } finally {
if (input != null) input.close(); input.close();
} }
} }

View File

@ -124,7 +124,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
if (!input.incrementToken()) { if (!input.incrementToken()) {
return false; return false;
} else { } else {
curTermBuffer = (char[]) termAtt.termBuffer().clone(); curTermBuffer = termAtt.termBuffer().clone();
curTermLength = termAtt.termLength(); curTermLength = termAtt.termLength();
curGramSize = minGram; curGramSize = minGram;
tokStart = offsetAtt.startOffset(); tokStart = offsetAtt.startOffset();

View File

@ -79,7 +79,7 @@ public final class NGramTokenFilter extends TokenFilter {
if (!input.incrementToken()) { if (!input.incrementToken()) {
return false; return false;
} else { } else {
curTermBuffer = (char[]) termAtt.termBuffer().clone(); curTermBuffer = termAtt.termBuffer().clone();
curTermLength = termAtt.termLength(); curTermLength = termAtt.termLength();
curGramSize = minGram; curGramSize = minGram;
curPos = 0; curPos = 0;

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -192,7 +192,7 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
* if there stopwords, it is a StopFilter around wrapped. * if there stopwords, it is a StopFilter around wrapped.
*/ */
TokenStream withStopFilter; TokenStream withStopFilter;
}; }
@Override @Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) public TokenStream reusableTokenStream(String fieldName, Reader reader)

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -110,13 +109,16 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -26,7 +26,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -161,14 +160,15 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the * Creates
* provided {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StandardTokenizer} filtered with {@link StandardFilter}, * built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter}, * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, * , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* and {@link SnowballFilter} * provided, and {@link SnowballFilter}
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -26,7 +26,7 @@ package org.apache.lucene.analysis.ru;
class RussianStemmer class RussianStemmer
{ {
// positions of RV, R1 and R2 respectively // positions of RV, R1 and R2 respectively
private int RV, R1, R2; private int RV, /*R1,*/ R2;
// letters (currently unused letters are commented out) // letters (currently unused letters are commented out)
private final static char A = '\u0430'; private final static char A = '\u0430';
@ -263,11 +263,7 @@ class RussianStemmer
if (!findAndRemoveEnding(stemmingZone, adjectiveEndings)) if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
return false; return false;
// if adjective ending was found, try for participle ending. // if adjective ending was found, try for participle ending.
// variable r is unused, we are just interested in the side effect of if (!findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors))
// findAndRemoveEnding():
boolean r =
findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
||
findAndRemoveEnding(stemmingZone, participleEndings2); findAndRemoveEnding(stemmingZone, participleEndings2);
return true; return true;
} }
@ -391,7 +387,7 @@ class RussianStemmer
private void markPositions(String word) private void markPositions(String word)
{ {
RV = 0; RV = 0;
R1 = 0; // R1 = 0;
R2 = 0; R2 = 0;
int i = 0; int i = 0;
// find RV // find RV
@ -409,7 +405,7 @@ class RussianStemmer
} }
if (word.length() - 1 < ++i) if (word.length() - 1 < ++i)
return; // R1 zone is empty return; // R1 zone is empty
R1 = i; // R1 = i;
// find R2 // find R2
while (word.length() > i && !isVowel(word.charAt(i))) while (word.length() > i && !isVowel(word.charAt(i)))
{ {
@ -532,13 +528,9 @@ class RussianStemmer
if (!perfectiveGerund(stemmingZone)) if (!perfectiveGerund(stemmingZone))
{ {
reflexive(stemmingZone); reflexive(stemmingZone);
// variable r is unused, we are just interested in the flow that gets if (!adjectival(stemmingZone))
// created by logical expression: apply adjectival(); if that fails, if (!verb(stemmingZone))
// apply verb() etc noun(stemmingZone);
boolean r =
adjectival(stemmingZone)
|| verb(stemmingZone)
|| noun(stemmingZone);
} }
// Step 2 // Step 2
removeI(stemmingZone); removeI(stemmingZone);

View File

@ -391,8 +391,8 @@ public final class ShingleFilter extends TokenFilter {
} }
/** /**
* {@see #advance()}
* @return the current value. * @return the current value.
* @see #advance()
*/ */
public int getValue() { public int getValue() {
return value; return value;

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream; import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.payloads.PayloadHelper; import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.sinks;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter; import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter;
@ -42,7 +41,7 @@ public class DateRecognizerSinkFilter extends SinkFilter {
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
*/ */
public DateRecognizerSinkFilter() { public DateRecognizerSinkFilter() {
this(SimpleDateFormat.getDateInstance()); this(DateFormat.getDateInstance());
} }
public DateRecognizerSinkFilter(DateFormat dateFormat) { public DateRecognizerSinkFilter(DateFormat dateFormat) {

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -106,13 +105,16 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link LowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * built from an {@link StandardTokenizer} filtered with
* exclusion set is provided and {@link SnowballFilter}. * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.th;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopFilter;
@ -45,12 +44,14 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
} }
/** /**
* Creates {@link TokenStreamComponents} used to tokenize all the text in the * Creates
* provided {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
* *
* @return {@link TokenStreamComponents} built from a * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link StandardTokenizer} filtered with {@link StandardFilter}, * built from a {@link StandardTokenizer} filtered with
* {@link ThaiWordFilter}, and {@link StopFilter} * {@link StandardFilter}, {@link ThaiWordFilter}, and
* {@link StopFilter}
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
@ -109,11 +108,14 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
} }
/** /**
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided * Creates a
* {@link Reader}. * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
* *
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer} * @return A
* filtered with {@link StandardFilter}, {@link TurkishLowerCaseFilter}, * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem * {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}. * exclusion set is provided and {@link SnowballFilter}.
*/ */

View File

@ -24,7 +24,6 @@ import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.util.Version;
/** /**
* Test the Arabic Analyzer * Test the Arabic Analyzer
@ -35,14 +34,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new ArabicAnalyzer(Version.LUCENE_CURRENT); new ArabicAnalyzer(TEST_VERSION_CURRENT);
} }
/** /**
* Some simple tests showing some features of the analyzer, how some regular forms will conflate * Some simple tests showing some features of the analyzer, how some regular forms will conflate
*/ */
public void testBasicFeatures() throws Exception { public void testBasicFeatures() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT); ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "كبير", new String[] { "كبير" }); assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
@ -63,7 +62,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Simple tests to show things are getting reset correctly, etc. * Simple tests to show things are getting reset correctly, etc.
*/ */
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT); ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" }); assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" });
assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker
} }
@ -72,7 +71,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Non-arabic text gets treated in a similar way as SimpleAnalyzer. * Non-arabic text gets treated in a similar way as SimpleAnalyzer.
*/ */
public void testEnglishInput() throws Exception { public void testEnglishInput() throws Exception {
assertAnalyzesTo(new ArabicAnalyzer(Version.LUCENE_CURRENT), "English text.", new String[] { assertAnalyzesTo(new ArabicAnalyzer(TEST_VERSION_CURRENT), "English text.", new String[] {
"english", "text" }); "english", "text" });
} }
@ -82,7 +81,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
public void testCustomStopwords() throws Exception { public void testCustomStopwords() throws Exception {
Set<String> set = new HashSet<String>(); Set<String> set = new HashSet<String>();
Collections.addAll(set, "the", "and", "a"); Collections.addAll(set, "the", "and", "a");
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, set); ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set);
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" }); "brown", "fox" });
} }
@ -90,12 +89,12 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
public void testWithStemExclusionSet() throws IOException { public void testWithStemExclusionSet() throws IOException {
Set<String> set = new HashSet<String>(); Set<String> set = new HashSet<String>();
set.add("ساهدهات"); set.add("ساهدهات");
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set); ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" }); assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
a = new ArabicAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET); a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
} }

View File

@ -21,11 +21,9 @@ import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/** /**
* Test the Arabic Normalization Filter * Test the Arabic Normalization Filter
*
*/ */
public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase { public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
@ -86,7 +84,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
} }
private void check(final String input, final String expected) throws IOException { private void check(final String input, final String expected) throws IOException {
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader(input)); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream); ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
assertTokenStreamContents(filter, new String[]{expected}); assertTokenStreamContents(filter, new String[]{expected});
} }

View File

@ -23,7 +23,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.util.Version;
/** /**
* Test the Arabic Normalization Filter * Test the Arabic Normalization Filter
@ -116,16 +115,16 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
} }
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("ساهدهات"); set.add("ساهدهات");
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader("ساهدهات")); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerTokenFilter(tokenStream, set)); ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerTokenFilter(tokenStream, set));
assertTokenStreamContents(filter, new String[]{"ساهدهات"}); assertTokenStreamContents(filter, new String[]{"ساهدهات"});
} }
private void check(final String input, final String expected) throws IOException { private void check(final String input, final String expected) throws IOException {
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader(input)); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
ArabicStemFilter filter = new ArabicStemFilter(tokenStream); ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
assertTokenStreamContents(filter, new String[]{expected}); assertTokenStreamContents(filter, new String[]{expected});
} }

View File

@ -34,23 +34,23 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath * This test fails with NPE when the stopwords file is missing in classpath
*/ */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new BulgarianAnalyzer(Version.LUCENE_CURRENT); new BulgarianAnalyzer(TEST_VERSION_CURRENT);
} }
public void testStopwords() throws IOException { public void testStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"}); assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"});
} }
public void testCustomStopwords() throws IOException { public void testCustomStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT, Collections Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, Collections
.emptySet()); .emptySet());
assertAnalyzesTo(a, "Как се казваш?", assertAnalyzesTo(a, "Как се казваш?",
new String[] {"как", "се", "казваш"}); new String[] {"как", "се", "казваш"});
} }
public void testReusableTokenStream() throws IOException { public void testReusableTokenStream() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "документи", new String[] {"документ"}); assertAnalyzesToReuse(a, "документи", new String[] {"документ"});
assertAnalyzesToReuse(a, "документ", new String[] {"документ"}); assertAnalyzesToReuse(a, "документ", new String[] {"документ"});
} }
@ -59,7 +59,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
* Test some examples from the paper * Test some examples from the paper
*/ */
public void testBasicExamples() throws IOException { public void testBasicExamples() throws IOException {
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "енергийни кризи", new String[] {"енергийн", "криз"}); assertAnalyzesTo(a, "енергийни кризи", new String[] {"енергийн", "криз"});
assertAnalyzesTo(a, "Атомната енергия", new String[] {"атомн", "енерг"}); assertAnalyzesTo(a, "Атомната енергия", new String[] {"атомн", "енерг"});
@ -72,7 +72,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
public void testWithStemExclusionSet() throws IOException { public void testWithStemExclusionSet() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true); CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("строеве"); set.add("строеве");
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" }); assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
} }
} }

View File

@ -35,7 +35,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* common (and some rare) plural pattern is listed. * common (and some rare) plural pattern is listed.
*/ */
public void testMasculineNouns() throws IOException { public void testMasculineNouns() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
// -и pattern // -и pattern
assertAnalyzesTo(a, "град", new String[] {"град"}); assertAnalyzesTo(a, "град", new String[] {"град"});
@ -101,7 +101,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* Test showing how feminine noun forms conflate * Test showing how feminine noun forms conflate
*/ */
public void testFeminineNouns() throws IOException { public void testFeminineNouns() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "вест", new String[] {"вест"}); assertAnalyzesTo(a, "вест", new String[] {"вест"});
assertAnalyzesTo(a, "вестта", new String[] {"вест"}); assertAnalyzesTo(a, "вестта", new String[] {"вест"});
@ -114,7 +114,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* plural pattern is listed * plural pattern is listed
*/ */
public void testNeuterNouns() throws IOException { public void testNeuterNouns() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
// -а pattern // -а pattern
assertAnalyzesTo(a, "дърво", new String[] {"дърв"}); assertAnalyzesTo(a, "дърво", new String[] {"дърв"});
@ -142,7 +142,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* Test showing how adjectival forms conflate * Test showing how adjectival forms conflate
*/ */
public void testAdjectives() throws IOException { public void testAdjectives() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "красив", new String[] {"красив"}); assertAnalyzesTo(a, "красив", new String[] {"красив"});
assertAnalyzesTo(a, "красивия", new String[] {"красив"}); assertAnalyzesTo(a, "красивия", new String[] {"красив"});
assertAnalyzesTo(a, "красивият", new String[] {"красив"}); assertAnalyzesTo(a, "красивият", new String[] {"красив"});
@ -158,7 +158,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
* Test some exceptional rules, implemented as rewrites. * Test some exceptional rules, implemented as rewrites.
*/ */
public void testExceptions() throws IOException { public void testExceptions() throws IOException {
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT); BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
// ци -> к // ци -> к
assertAnalyzesTo(a, "собственик", new String[] {"собственик"}); assertAnalyzesTo(a, "собственик", new String[] {"собственик"});
@ -215,7 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true); CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("строеве"); set.add("строеве");
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("строевете строеве")); new StringReader("строевете строеве"));
BulgarianStemFilter filter = new BulgarianStemFilter( BulgarianStemFilter filter = new BulgarianStemFilter(

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.LowerCaseTokenizer; import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test the Brazilian Stem Filter, which only modifies the term text. * Test the Brazilian Stem Filter, which only modifies the term text.
@ -128,7 +127,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
checkReuse(a, "boa", "boa"); checkReuse(a, "boa", "boa");
checkReuse(a, "boainain", "boainain"); checkReuse(a, "boainain", "boainain");
checkReuse(a, "boas", "boas"); checkReuse(a, "boas", "boas");
@ -136,35 +135,35 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
} }
public void testStemExclusionTable() throws Exception { public void testStemExclusionTable() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT); BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
a.setStemExclusionTable(new String[] { "quintessência" }); a.setStemExclusionTable(new String[] { "quintessência" });
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged. checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
} }
public void testStemExclusionTableBWCompat() throws IOException { public void testStemExclusionTableBWCompat() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília"); set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter( BrazilianStemFilter filter = new BrazilianStemFilter(
new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader("Brasília Brasilia")), set); new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" }); assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
} }
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília"); set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter( BrazilianStemFilter filter = new BrazilianStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Brasília Brasilia")), set)); "Brasília Brasilia")), set));
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" }); assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
} }
public void testWithKeywordAttributeAndExclusionTable() throws IOException { public void testWithKeywordAttributeAndExclusionTable() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília"); set.add("Brasília");
CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set1.add("Brasilia"); set1.add("Brasilia");
BrazilianStemFilter filter = new BrazilianStemFilter( BrazilianStemFilter filter = new BrazilianStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Brasília Brasilia")), set), set1); "Brasília Brasilia")), set), set1);
assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" }); assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
} }
@ -174,14 +173,14 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams. * when using reusable token streams.
*/ */
public void testExclusionTableReuse() throws Exception { public void testExclusionTableReuse() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT); BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
checkReuse(a, "quintessência", "quintessente"); checkReuse(a, "quintessência", "quintessente");
a.setStemExclusionTable(new String[] { "quintessência" }); a.setStemExclusionTable(new String[] { "quintessência" });
checkReuse(a, "quintessência", "quintessência"); checkReuse(a, "quintessência", "quintessência");
} }
private void check(final String input, final String expected) throws Exception { private void check(final String input, final String expected) throws Exception {
checkOneTerm(new BrazilianAnalyzer(Version.LUCENE_CURRENT), input, expected); checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected);
} }
private void checkReuse(Analyzer a, String input, String expected) throws Exception { private void checkReuse(Analyzer a, String input, String expected) throws Exception {

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.Version;
public class TestCJKTokenizer extends BaseTokenStreamTestCase { public class TestCJKTokenizer extends BaseTokenStreamTestCase {
@ -42,7 +41,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
} }
public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException { public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT); Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
String terms[] = new String[out_tokens.length]; String terms[] = new String[out_tokens.length];
int startOffsets[] = new int[out_tokens.length]; int startOffsets[] = new int[out_tokens.length];
int endOffsets[] = new int[out_tokens.length]; int endOffsets[] = new int[out_tokens.length];
@ -57,7 +56,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
} }
public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException { public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT); Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
String terms[] = new String[out_tokens.length]; String terms[] = new String[out_tokens.length];
int startOffsets[] = new int[out_tokens.length]; int startOffsets[] = new int[out_tokens.length];
int endOffsets[] = new int[out_tokens.length]; int endOffsets[] = new int[out_tokens.length];
@ -213,13 +212,13 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
} }
public void testTokenStream() throws Exception { public void testTokenStream() throws Exception {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT); Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02", assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02",
new String[] { "\u4e00\u4e01", "\u4e01\u4e02"}); new String[] { "\u4e00\u4e01", "\u4e01\u4e02"});
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT); Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053"; String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
TestToken[] out_tokens = { TestToken[] out_tokens = {

View File

@ -28,17 +28,11 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
static final File dataDir = new File(System.getProperty("dataDir", "./bin")); static final File dataDir = new File(System.getProperty("dataDir", "./bin"));
static final File testFile = new File(dataDir, "org/apache/lucene/analysis/compound/da_UTF8.xml"); static final File testFile = new File(dataDir, "org/apache/lucene/analysis/compound/da_UTF8.xml");
@Override
protected void setUp() throws Exception {
super.setUp();
}
public void testHyphenationCompoundWordsDA() throws Exception { public void testHyphenationCompoundWordsDA() throws Exception {
String[] dict = { "læse", "hest" }; String[] dict = { "læse", "hest" };
@ -47,8 +41,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
.getHyphenationTree(reader); .getHyphenationTree(reader);
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(Version.LUCENE_CURRENT, HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"min veninde som er lidt af en læsehest")), hyphenator, "min veninde som er lidt af en læsehest")), hyphenator,
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
@ -67,8 +61,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
.getHyphenationTree(reader); .getHyphenationTree(reader);
// the word basket will not be added due to the longest match option // the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(Version.LUCENE_CURRENT, HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"basketballkurv")), hyphenator, dict, "basketballkurv")), hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true); CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
@ -84,8 +78,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll",
"Sko", "Vind", "Rute", "Torkare", "Blad" }; "Sko", "Vind", "Rute", "Torkare", "Blad" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT, DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader( new StringReader(
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")), "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
dict); dict);
@ -113,8 +107,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll",
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" }; "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT, DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")), new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true); CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
@ -129,9 +123,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz", String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz",
"Aufgabe", "Überwachung" }; "Aufgabe", "Überwachung" };
Tokenizer wsTokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Rindfleischüberwachungsgesetz")); "Rindfleischüberwachungsgesetz"));
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT, DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
wsTokenizer, dict, wsTokenizer, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,

View File

@ -48,7 +48,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
} }
public void testStopWord() throws Exception { public void testStopWord() throws Exception {
assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem", assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem",
new String[] { "mluvim", "voln" }); new String[] { "mluvim", "voln" });
} }
@ -63,7 +63,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT); Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" }); assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" }); assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" });
} }
@ -112,9 +112,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
} }
public void testWithStemExclusionSet() throws IOException{ public void testWithStemExclusionSet() throws IOException{
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole"); set.add("hole");
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"}); assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
} }
} }

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerTokenFilter; import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test the Czech Stemmer. * Test the Czech Stemmer.
@ -38,7 +37,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how masculine noun forms conflate * Test showing how masculine noun forms conflate
*/ */
public void testMasculineNouns() throws IOException { public void testMasculineNouns() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* animate ending with a hard consonant */ /* animate ending with a hard consonant */
assertAnalyzesTo(cz, "pán", new String[] { "pán" }); assertAnalyzesTo(cz, "pán", new String[] { "pán" });
@ -106,7 +105,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how feminine noun forms conflate * Test showing how feminine noun forms conflate
*/ */
public void testFeminineNouns() throws IOException { public void testFeminineNouns() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* ending with hard consonant */ /* ending with hard consonant */
assertAnalyzesTo(cz, "kost", new String[] { "kost" }); assertAnalyzesTo(cz, "kost", new String[] { "kost" });
@ -150,7 +149,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how neuter noun forms conflate * Test showing how neuter noun forms conflate
*/ */
public void testNeuterNouns() throws IOException { public void testNeuterNouns() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* ending with o */ /* ending with o */
assertAnalyzesTo(cz, "město", new String[] { "měst" }); assertAnalyzesTo(cz, "město", new String[] { "měst" });
@ -193,7 +192,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test showing how adjectival forms conflate * Test showing how adjectival forms conflate
*/ */
public void testAdjectives() throws IOException { public void testAdjectives() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* ending with ý/á/é */ /* ending with ý/á/é */
assertAnalyzesTo(cz, "mladý", new String[] { "mlad" }); assertAnalyzesTo(cz, "mladý", new String[] { "mlad" });
@ -221,7 +220,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test some possessive suffixes * Test some possessive suffixes
*/ */
public void testPossessive() throws IOException { public void testPossessive() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(cz, "Karlův", new String[] { "karl" }); assertAnalyzesTo(cz, "Karlův", new String[] { "karl" });
assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" }); assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" });
} }
@ -230,7 +229,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test some exceptional rules, implemented as rewrites. * Test some exceptional rules, implemented as rewrites.
*/ */
public void testExceptions() throws IOException { public void testExceptions() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
/* rewrite of št -> sk */ /* rewrite of št -> sk */
assertAnalyzesTo(cz, "český", new String[] { "česk" }); assertAnalyzesTo(cz, "český", new String[] { "česk" });
@ -270,16 +269,16 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
* Test that very short words are not stemmed. * Test that very short words are not stemmed.
*/ */
public void testDontStem() throws IOException { public void testDontStem() throws IOException {
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(cz, "e", new String[] { "e" }); assertAnalyzesTo(cz, "e", new String[] { "e" });
assertAnalyzesTo(cz, "zi", new String[] { "zi" }); assertAnalyzesTo(cz, "zi", new String[] { "zi" });
} }
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole"); set.add("hole");
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerTokenFilter( CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerTokenFilter(
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hole desek")), set)); new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
assertTokenStreamContents(filter, new String[] { "hole", "desk" }); assertTokenStreamContents(filter, new String[] { "hole", "desk" });
} }

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestDanishAnalyzer extends BaseTokenStreamTestCase { public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new DanishAnalyzer(Version.LUCENE_CURRENT); new DanishAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "undersøg", "undersøg"); checkOneTermReuse(a, "undersøg", "undersøg");
checkOneTermReuse(a, "undersøgelse", "undersøg"); checkOneTermReuse(a, "undersøgelse", "undersøg");
@ -46,7 +45,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("undersøgelse"); exclusionSet.add("undersøgelse");
Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT,
DanishAnalyzer.getDefaultStopSet(), exclusionSet); DanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "undersøgelse", "undersøgelse"); checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg"); checkOneTermReuse(a, "undersøg", "undersøg");

View File

@ -29,38 +29,38 @@ import org.apache.lucene.util.Version;
public class TestGermanAnalyzer extends BaseTokenStreamTestCase { public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "Tisch", "tisch"); checkOneTermReuse(a, "Tisch", "tisch");
checkOneTermReuse(a, "Tische", "tisch"); checkOneTermReuse(a, "Tische", "tisch");
checkOneTermReuse(a, "Tischen", "tisch"); checkOneTermReuse(a, "Tischen", "tisch");
} }
public void testExclusionTableBWCompat() throws IOException { public void testExclusionTableBWCompat() throws IOException {
GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT,
new StringReader("Fischen Trinken"))); new StringReader("Fischen Trinken")));
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen"); set.add("fischen");
filter.setExclusionSet(set); filter.setExclusionSet(set);
assertTokenStreamContents(filter, new String[] { "fischen", "trink" }); assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
} }
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen"); set.add("fischen");
GermanStemFilter filter = new GermanStemFilter( GermanStemFilter filter = new GermanStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Fischen Trinken")), set)); "Fischen Trinken")), set));
assertTokenStreamContents(filter, new String[] { "fischen", "trink" }); assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
} }
public void testWithKeywordAttributeAndExclusionTable() throws IOException { public void testWithKeywordAttributeAndExclusionTable() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen"); set.add("fischen");
CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set1.add("trinken"); set1.add("trinken");
set1.add("fischen"); set1.add("fischen");
GermanStemFilter filter = new GermanStemFilter( GermanStemFilter filter = new GermanStemFilter(
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Fischen Trinken")), set)); "Fischen Trinken")), set));
filter.setExclusionSet(set1); filter.setExclusionSet(set1);
assertTokenStreamContents(filter, new String[] { "fischen", "trinken" }); assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
@ -71,7 +71,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
* when using reusable token streams. * when using reusable token streams.
*/ */
public void testExclusionTableReuse() throws Exception { public void testExclusionTableReuse() throws Exception {
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT); GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "tischen", "tisch"); checkOneTermReuse(a, "tischen", "tisch");
a.setStemExclusionTable(new String[] { "tischen" }); a.setStemExclusionTable(new String[] { "tischen" });
checkOneTermReuse(a, "tischen", "tischen"); checkOneTermReuse(a, "tischen", "tischen");
@ -81,7 +81,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
* these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
*/ */
public void testGermanSpecials() throws Exception { public void testGermanSpecials() throws Exception {
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT); GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
// a/o/u + e is equivalent to the umlaut form // a/o/u + e is equivalent to the umlaut form
checkOneTermReuse(a, "Schaltflächen", "schaltflach"); checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflach"); checkOneTermReuse(a, "Schaltflaechen", "schaltflach");

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test the German stemmer. The stemming algorithm is known to work less * Test the German stemmer. The stemming algorithm is known to work less
@ -40,7 +39,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
public void testStemming() throws Exception { public void testStemming() throws Exception {
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("")); Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer)); TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
// read test cases from external file: // read test cases from external file:
File dataDir = new File(System.getProperty("dataDir", "./bin")); File dataDir = new File(System.getProperty("dataDir", "./bin"));
File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt"); File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");

View File

@ -32,7 +32,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
* @throws Exception in case an error occurs * @throws Exception in case an error occurs
*/ */
public void testAnalyzer() throws Exception { public void testAnalyzer() throws Exception {
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
// Verify the correct analysis of capitals and small accented letters // Verify the correct analysis of capitals and small accented letters
assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2", assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd", new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
@ -48,7 +48,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
// Verify the correct analysis of capitals and small accented letters // Verify the correct analysis of capitals and small accented letters
assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2", assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd", new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestEnglishAnalyzer extends BaseTokenStreamTestCase { public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new EnglishAnalyzer(Version.LUCENE_CURRENT); new EnglishAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "books", "book"); checkOneTermReuse(a, "books", "book");
checkOneTermReuse(a, "book", "book"); checkOneTermReuse(a, "book", "book");
@ -46,7 +45,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("books"); exclusionSet.add("books");
Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT,
EnglishAnalyzer.getDefaultStopSet(), exclusionSet); EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "books", "books"); checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book"); checkOneTermReuse(a, "book", "book");

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestSpanishAnalyzer extends BaseTokenStreamTestCase { public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new SpanishAnalyzer(Version.LUCENE_CURRENT); new SpanishAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "chicana", "chican"); checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chican"); checkOneTermReuse(a, "chicano", "chican");
@ -46,7 +45,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("chicano"); exclusionSet.add("chicano");
Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT,
SpanishAnalyzer.getDefaultStopSet(), exclusionSet); SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "chicana", "chican"); checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano"); checkOneTermReuse(a, "chicano", "chicano");

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fa;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.Version;
/** /**
* Test the Persian Analyzer * Test the Persian Analyzer
@ -31,7 +30,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath * This test fails with NPE when the stopwords file is missing in classpath
*/ */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new PersianAnalyzer(Version.LUCENE_CURRENT); new PersianAnalyzer(TEST_VERSION_CURRENT);
} }
/** /**
@ -42,7 +41,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
*/ */
public void testBehaviorVerbs() throws Exception { public void testBehaviorVerbs() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
// active present indicative // active present indicative
assertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" }); assertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" });
// active preterite indicative // active preterite indicative
@ -118,7 +117,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
*/ */
public void testBehaviorVerbsDefective() throws Exception { public void testBehaviorVerbsDefective() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
// active present indicative // active present indicative
assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" }); assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
// active preterite indicative // active preterite indicative
@ -189,7 +188,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* nouns, removing the plural -ha. * nouns, removing the plural -ha.
*/ */
public void testBehaviorNouns() throws Exception { public void testBehaviorNouns() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" }); assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" }); assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" });
} }
@ -199,7 +198,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* (lowercased, etc) * (lowercased, etc)
*/ */
public void testBehaviorNonPersian() throws Exception { public void testBehaviorNonPersian() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" }); assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
} }
@ -207,7 +206,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Basic test ensuring that reusableTokenStream works correctly. * Basic test ensuring that reusableTokenStream works correctly.
*/ */
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" }); assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
assertAnalyzesToReuse(a, "برگ‌ها", new String[] { "برگ" }); assertAnalyzesToReuse(a, "برگ‌ها", new String[] { "برگ" });
} }
@ -216,7 +215,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Test that custom stopwords work, and are not case-sensitive. * Test that custom stopwords work, and are not case-sensitive.
*/ */
public void testCustomStopwords() throws Exception { public void testCustomStopwords() throws Exception {
PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" }); PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, new String[] { "the", "and", "a" });
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" }); "brown", "fox" });
} }

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer; import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test the Persian Normalization Filter * Test the Persian Normalization Filter
@ -55,7 +54,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase {
} }
private void check(final String input, final String expected) throws IOException { private void check(final String input, final String expected) throws IOException {
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT,
new StringReader(input)); new StringReader(input));
PersianNormalizationFilter filter = new PersianNormalizationFilter( PersianNormalizationFilter filter = new PersianNormalizationFilter(
tokenStream); tokenStream);

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestFinnishAnalyzer extends BaseTokenStreamTestCase { public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new FinnishAnalyzer(Version.LUCENE_CURRENT); new FinnishAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäj"); checkOneTermReuse(a, "edeltäjistään", "edeltäj");
@ -46,7 +45,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("edeltäjistään"); exclusionSet.add("edeltäjistään");
Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT,
FinnishAnalyzer.getDefaultStopSet(), exclusionSet); FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään"); checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/** /**
* *
@ -38,19 +37,19 @@ public class TestElision extends BaseTokenStreamTestCase {
public void testElision() throws Exception { public void testElision() throws Exception {
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test)); Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
Set articles = new HashSet(); Set<String> articles = new HashSet<String>();
articles.add("l"); articles.add("l");
articles.add("M"); articles.add("M");
TokenFilter filter = new ElisionFilter(Version.LUCENE_CURRENT, tokenizer, articles); TokenFilter filter = new ElisionFilter(TEST_VERSION_CURRENT, tokenizer, articles);
List tas = filtre(filter); List<String> tas = filter(filter);
assertEquals("embrouille", tas.get(4)); assertEquals("embrouille", tas.get(4));
assertEquals("O'brian", tas.get(6)); assertEquals("O'brian", tas.get(6));
assertEquals("enfin", tas.get(7)); assertEquals("enfin", tas.get(7));
} }
private List filtre(TokenFilter filter) throws IOException { private List<String> filter(TokenFilter filter) throws IOException {
List tas = new ArrayList(); List<String> tas = new ArrayList<String>();
TermAttribute termAtt = filter.getAttribute(TermAttribute.class); TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
while (filter.incrementToken()) { while (filter.incrementToken()) {
tas.add(termAtt.term()); tas.add(termAtt.term());

View File

@ -32,7 +32,7 @@ import org.apache.lucene.util.Version;
public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
public void testAnalyzer() throws Exception { public void testAnalyzer() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(fa, "", new String[] { assertAnalyzesTo(fa, "", new String[] {
}); });
@ -204,7 +204,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
// stopwords // stopwords
assertAnalyzesToReuse( assertAnalyzesToReuse(
fa, fa,
@ -229,27 +229,27 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
* when using reusable token streams. * when using reusable token streams.
*/ */
public void testExclusionTableReuse() throws Exception { public void testExclusionTableReuse() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" }); assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
fa.setStemExclusionTable(new String[] { "habitable" }); fa.setStemExclusionTable(new String[] { "habitable" });
assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" }); assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
} }
public void testExclusionTableViaCtor() throws Exception { public void testExclusionTableViaCtor() throws Exception {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("habitable"); set.add("habitable");
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT, FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT,
CharArraySet.EMPTY_SET, set); CharArraySet.EMPTY_SET, set);
assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable", assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable",
"chist" }); "chist" });
fa = new FrenchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set); fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
"chist" }); "chist" });
} }
public void testElision() throws Exception { public void testElision() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" }); assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
} }

View File

@ -5,7 +5,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -31,11 +30,11 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new HindiAnalyzer(Version.LUCENE_CURRENT); new HindiAnalyzer(TEST_VERSION_CURRENT);
} }
public void testBasics() throws Exception { public void testBasics() throws Exception {
Analyzer a = new HindiAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT);
// two ways to write 'hindi' itself. // two ways to write 'hindi' itself.
checkOneTermReuse(a, "हिन्दी", "हिंद"); checkOneTermReuse(a, "हिन्दी", "हिंद");
checkOneTermReuse(a, "हिंदी", "हिंद"); checkOneTermReuse(a, "हिंदी", "हिंद");
@ -44,7 +43,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
public void testExclusionSet() throws Exception { public void testExclusionSet() throws Exception {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("हिंदी"); exclusionSet.add("हिंदी");
Analyzer a = new HindiAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT,
HindiAnalyzer.getDefaultStopSet(), exclusionSet); HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिंदी", "हिंदी"); checkOneTermReuse(a, "हिंदी", "हिंदी");
} }

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test HindiNormalizer * Test HindiNormalizer
@ -60,7 +59,7 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो"); check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
} }
private void check(String input, String output) throws IOException { private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input)); new StringReader(input));
TokenFilter tf = new HindiNormalizationFilter(tokenizer); TokenFilter tf = new HindiNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output }); assertTokenStreamContents(tf, new String[] { output });

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test HindiStemmer * Test HindiStemmer
@ -82,7 +81,7 @@ public class TestHindiStemmer extends BaseTokenStreamTestCase {
} }
private void check(String input, String output) throws IOException { private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input)); new StringReader(input));
TokenFilter tf = new HindiStemFilter(tokenizer); TokenFilter tf = new HindiStemFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output }); assertTokenStreamContents(tf, new String[] { output });

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestHungarianAnalyzer extends BaseTokenStreamTestCase { public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new HungarianAnalyzer(Version.LUCENE_CURRENT); new HungarianAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "babakocsi", "babakocs"); checkOneTermReuse(a, "babakocsi", "babakocs");
checkOneTermReuse(a, "babakocsijáért", "babakocs"); checkOneTermReuse(a, "babakocsijáért", "babakocs");
@ -46,7 +45,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("babakocsi"); exclusionSet.add("babakocsi");
Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT,
HungarianAnalyzer.getDefaultStopSet(), exclusionSet); HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "babakocsi", "babakocsi"); checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs"); checkOneTermReuse(a, "babakocsijáért", "babakocs");

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test IndicNormalizer * Test IndicNormalizer
@ -45,7 +44,7 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase {
} }
private void check(String input, String output) throws IOException { private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input)); new StringReader(input));
TokenFilter tf = new IndicNormalizationFilter(tokenizer); TokenFilter tf = new IndicNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output }); assertTokenStreamContents(tf, new String[] { output });

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/** /**
* Test IndicTokenizer * Test IndicTokenizer
@ -30,7 +29,7 @@ import org.apache.lucene.util.Version;
public class TestIndicTokenizer extends BaseTokenStreamTestCase { public class TestIndicTokenizer extends BaseTokenStreamTestCase {
/** Test tokenizing Indic vowels, signs, and punctuation */ /** Test tokenizing Indic vowels, signs, and punctuation */
public void testBasics() throws IOException { public void testBasics() throws IOException {
TokenStream ts = new IndicTokenizer(Version.LUCENE_CURRENT, TokenStream ts = new IndicTokenizer(TEST_VERSION_CURRENT,
new StringReader("मुझे हिंदी का और अभ्यास करना होगा ।")); new StringReader("मुझे हिंदी का और अभ्यास करना होगा ।"));
assertTokenStreamContents(ts, assertTokenStreamContents(ts,
new String[] { "मुझे", "हिंदी", "का", "और", "अभ्यास", "करना", "होगा" }); new String[] { "मुझे", "हिंदी", "का", "और", "अभ्यास", "करना", "होगा" });
@ -38,7 +37,7 @@ public class TestIndicTokenizer extends BaseTokenStreamTestCase {
/** Test that words with format chars such as ZWJ are kept */ /** Test that words with format chars such as ZWJ are kept */
public void testFormat() throws Exception { public void testFormat() throws Exception {
TokenStream ts = new IndicTokenizer(Version.LUCENE_CURRENT, TokenStream ts = new IndicTokenizer(TEST_VERSION_CURRENT,
new StringReader("शार्‍मा शार्‍मा")); new StringReader("शार्‍मा शार्‍मा"));
assertTokenStreamContents(ts, new String[] { "शार्‍मा", "शार्‍मा" }); assertTokenStreamContents(ts, new String[] { "शार्‍मा", "शार्‍मा" });
} }

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase { public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new ItalianAnalyzer(Version.LUCENE_CURRENT); new ItalianAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "abbandonata", "abbandon"); checkOneTermReuse(a, "abbandonata", "abbandon");
checkOneTermReuse(a, "abbandonati", "abbandon"); checkOneTermReuse(a, "abbandonati", "abbandon");
@ -46,7 +45,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("abbandonata"); exclusionSet.add("abbandonata");
Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT,
ItalianAnalyzer.getDefaultStopSet(), exclusionSet); ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "abbandonata", "abbandonata"); checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon"); checkOneTermReuse(a, "abbandonati", "abbandon");

View File

@ -24,7 +24,6 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/** /**
* Verifies the behavior of PatternAnalyzer. * Verifies the behavior of PatternAnalyzer.
@ -37,13 +36,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/ */
public void testNonWordPattern() throws IOException { public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords // Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
false, null); false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] { check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox", "the", "abcd", "dc" }); "The", "quick", "brown", "Fox", "the", "abcd", "dc" });
// split on non-letter pattern, lowercase, english stopwords // split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] { check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox", "abcd", "dc" }); "quick", "brown", "fox", "abcd", "dc" });
@ -55,13 +54,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/ */
public void testWhitespacePattern() throws IOException { public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords // Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null); false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] { check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." }); "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
// Split on whitespace patterns, lowercase, english stopwords // Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] { check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." }); "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
@ -73,12 +72,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/ */
public void testCustomPattern() throws IOException { public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords // Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), false, null); PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here", check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
"Are", "some", "Comma", "separated", "words" }); "Are", "some", "Comma", "separated", "words" });
// split on comma, lowercase, english stopwords // split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), true, PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET); StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here", check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
"some", "comma", "separated", "words" }); "some", "comma", "separated", "words" });
@ -103,7 +102,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
document.append(largeWord2); document.append(largeWord2);
// Split on whitespace patterns, do not lowercase, no stopwords // Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null); false, null);
check(a, document.toString(), new String[] { new String(largeWord), check(a, document.toString(), new String[] { new String(largeWord),
new String(largeWord2) }); new String(largeWord2) });

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -31,7 +30,7 @@ public class TestPrefixAndSuffixAwareTokenFilter extends BaseTokenStreamTestCase
PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter( PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
new SingleTokenTokenStream(createToken("^", 0, 0)), new SingleTokenTokenStream(createToken("^", 0, 0)),
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hello world")), new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")),
new SingleTokenTokenStream(createToken("$", 0, 0))); new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts, assertTokenStreamContents(ts,

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -42,7 +41,7 @@ public class TestPrefixAwareTokenFilter extends BaseTokenStreamTestCase {
// prefix and suffix using 2x prefix // prefix and suffix using 2x prefix
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)), ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hello world"))); new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")));
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0))); ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts, assertTokenStreamContents(ts,

View File

@ -10,7 +10,6 @@ import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.PorterStemFilter; import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -38,7 +37,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
dictionary.put("booked", "books"); dictionary.put("booked", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked")); Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
TokenStream stream = new PorterStemFilter( TokenStream stream = new PorterStemFilter(
new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary)); new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
assertTokenStreamContents(stream, new String[] { "books" }); assertTokenStreamContents(stream, new String[] { "books" });
} }
} }

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
import java.io.StringReader; import java.io.StringReader;
@ -31,9 +30,9 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
private TokenStream input; private TokenStream input;
@Override @Override
public void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde")); input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
} }
public void testInvalidInput() throws Exception { public void testInvalidInput() throws Exception {
@ -92,13 +91,13 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
} }
public void testSmallTokenInStream() throws Exception { public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh")); input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3); EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10}); assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
} }
public void testReset() throws Exception { public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde")); WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3); EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}); assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
tokenizer.reset(new StringReader("abcde")); tokenizer.reset(new StringReader("abcde"));

View File

@ -29,7 +29,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase {
private StringReader input; private StringReader input;
@Override @Override
public void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
input = new StringReader("abcde"); input = new StringReader("abcde");
} }

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
import java.io.StringReader; import java.io.StringReader;
@ -31,9 +30,9 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
private TokenStream input; private TokenStream input;
@Override @Override
public void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde")); input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
} }
public void testInvalidInput() throws Exception { public void testInvalidInput() throws Exception {
@ -81,13 +80,13 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
} }
public void testSmallTokenInStream() throws Exception { public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh")); input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3); NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10}); assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
} }
public void testReset() throws Exception { public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde")); WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1); NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}); assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
tokenizer.reset(new StringReader("abcde")); tokenizer.reset(new StringReader("abcde"));

View File

@ -29,7 +29,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase {
private StringReader input; private StringReader input;
@Override @Override
public void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
input = new StringReader("abcde"); input = new StringReader("abcde");
} }

View File

@ -127,14 +127,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
} }
public void testSnowballCorrectness() throws Exception { public void testSnowballCorrectness() throws Exception {
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "opheffen", "opheff"); checkOneTermReuse(a, "opheffen", "opheff");
checkOneTermReuse(a, "opheffende", "opheff"); checkOneTermReuse(a, "opheffende", "opheff");
checkOneTermReuse(a, "opheffing", "opheff"); checkOneTermReuse(a, "opheffing", "opheff");
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek"); checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
checkOneTermReuse(a, "lichamelijk", "licham"); checkOneTermReuse(a, "lichamelijk", "licham");
checkOneTermReuse(a, "lichamelijke", "licham"); checkOneTermReuse(a, "lichamelijke", "licham");
@ -146,7 +146,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams. * when using reusable token streams.
*/ */
public void testExclusionTableReuse() throws Exception { public void testExclusionTableReuse() throws Exception {
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "lichamelijk", "licham"); checkOneTermReuse(a, "lichamelijk", "licham");
a.setStemExclusionTable(new String[] { "lichamelijk" }); a.setStemExclusionTable(new String[] { "lichamelijk" });
checkOneTermReuse(a, "lichamelijk", "lichamelijk"); checkOneTermReuse(a, "lichamelijk", "lichamelijk");
@ -157,10 +157,10 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
public void testExclusionTableViaCtor() throws IOException { public void testExclusionTableViaCtor() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true); CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
set.add("lichamelijk"); set.add("lichamelijk");
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
a = new DutchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set); a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
} }
@ -170,7 +170,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams. * when using reusable token streams.
*/ */
public void testStemDictionaryReuse() throws Exception { public void testStemDictionaryReuse() throws Exception {
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
checkOneTermReuse(a, "lichamelijk", "licham"); checkOneTermReuse(a, "lichamelijk", "licham");
a.setStemDictionary(customDictFile); a.setStemDictionary(customDictFile);
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent"); checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
@ -196,7 +196,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
} }
private void check(final String input, final String expected) throws Exception { private void check(final String input, final String expected) throws Exception {
checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected); checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
} }
} }

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase { public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new NorwegianAnalyzer(Version.LUCENE_CURRENT); new NorwegianAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "havnedistriktene", "havnedistrikt"); checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
@ -46,7 +45,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("havnedistriktene"); exclusionSet.add("havnedistriktene");
Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT,
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet); NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene"); checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");

View File

@ -22,21 +22,15 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload; import org.apache.lucene.index.Payload;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import java.io.StringReader; import java.io.StringReader;
/**
*
*
**/
public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testPayloads() throws Exception { public void testPayloads() throws Exception {
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
TermAttribute termAtt = filter.getAttribute(TermAttribute.class); TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
@ -57,7 +51,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
assertTermEquals("The", filter, null); assertTermEquals("The", filter, null);
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8")); assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
@ -75,7 +69,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testFloatEncoding() throws Exception { public void testFloatEncoding() throws Exception {
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), '|', new FloatEncoder()); DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
TermAttribute termAtt = filter.getAttribute(TermAttribute.class); TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("The", filter, termAtt, payAtt, null);
@ -93,7 +87,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testIntEncoding() throws Exception { public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), '|', new IntegerEncoder()); DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
TermAttribute termAtt = filter.getAttribute(TermAttribute.class); TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("The", filter, termAtt, payAtt, null);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -38,7 +37,7 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase {
public void test() throws IOException { public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs"; String test = "The quick red fox jumped over the lazy brown dogs";
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))), 3, "D"); NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
boolean seenDogs = false; boolean seenDogs = false;
TermAttribute termAtt = nptf.getAttribute(TermAttribute.class); TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class); TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);

View File

@ -21,7 +21,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload; import org.apache.lucene.index.Payload;
import org.apache.lucene.util.Version;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -36,7 +35,7 @@ public class TokenOffsetPayloadTokenFilterTest extends BaseTokenStreamTestCase {
public void test() throws IOException { public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs"; String test = "The quick red fox jumped over the lazy brown dogs";
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))); TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
int count = 0; int count = 0;
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class); PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class); OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -38,7 +37,7 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
public void test() throws IOException { public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs"; String test = "The quick red fox jumped over the lazy brown dogs";
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)))); TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
int count = 0; int count = 0;
TermAttribute termAtt = nptf.getAttribute(TermAttribute.class); TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class); TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
@ -48,7 +47,6 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0])))); assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
String type = new String(payloadAtt.getPayload().getData(), "UTF-8"); String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
assertTrue("type is null and it shouldn't be", type != null);
assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true); assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
count++; count++;
} }

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase { public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new PortugueseAnalyzer(Version.LUCENE_CURRENT); new PortugueseAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "quilométricas", "quilométr"); checkOneTermReuse(a, "quilométricas", "quilométr");
checkOneTermReuse(a, "quilométricos", "quilométr"); checkOneTermReuse(a, "quilométricos", "quilométr");
@ -46,7 +45,7 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("quilométricas"); exclusionSet.add("quilométricas");
Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT,
PortugueseAnalyzer.getDefaultStopSet(), exclusionSet); PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "quilométricas", "quilométricas"); checkOneTermReuse(a, "quilométricas", "quilométricas");
checkOneTermReuse(a, "quilométricos", "quilométr"); checkOneTermReuse(a, "quilométricos", "quilométr");

View File

@ -37,7 +37,6 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"}; String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
@ -51,7 +50,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
dir = new RAMDirectory(); dir = new RAMDirectory();
appAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); appAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
IndexWriter writer = new IndexWriter(dir, appAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); IndexWriter writer = new IndexWriter(dir, appAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
int numDocs = 200; int numDocs = 200;
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
@ -64,7 +63,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
} }
writer.close(); writer.close();
reader = IndexReader.open(dir, true); reader = IndexReader.open(dir, true);
protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer);
} }
@Override @Override
@ -75,7 +74,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
//Helper method to query //Helper method to query
private int search(Analyzer a, String queryString) throws IOException, ParseException { private int search(Analyzer a, String queryString) throws IOException, ParseException {
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "repetitiveField", a); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
Query q = qp.parse(queryString); Query q = qp.parse(queryString);
return new IndexSearcher(reader).search(q, null, 1000).totalHits; return new IndexSearcher(reader).search(q, null, 1000).totalHits;
} }
@ -157,14 +156,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0) if (++invocationCount % 2 == 0)
return new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader); return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
else else
return new LetterTokenizer(Version.LUCENE_CURRENT, reader); return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
} }
} }
public void testWrappingNonReusableAnalyzer() throws Exception { public void testWrappingNonReusableAnalyzer() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer()); QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
a.addStopWords(reader, 10); a.addStopWords(reader, 10);
int numHits = search(a, "repetitiveField:boring"); int numHits = search(a, "repetitiveField:boring");
assertTrue(numHits == 0); assertTrue(numHits == 0);
@ -173,7 +172,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
} }
public void testTokenStream() throws Exception { public void testTokenStream() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)); QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
a.addStopWords(reader, 10); a.addStopWords(reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring")); TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
TermAttribute termAtt = ts.getAttribute(TermAttribute.class); TermAttribute termAtt = ts.getAttribute(TermAttribute.class);

View File

@ -27,9 +27,9 @@ import org.apache.lucene.util.Version;
public class TestReverseStringFilter extends BaseTokenStreamTestCase { public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testFilter() throws Exception { public void testFilter() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("Do have a nice day")); // 1-4 length string new StringReader("Do have a nice day")); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(Version.LUCENE_CURRENT, stream); ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
TermAttribute text = filter.getAttribute(TermAttribute.class); TermAttribute text = filter.getAttribute(TermAttribute.class);
assertTrue(filter.incrementToken()); assertTrue(filter.incrementToken());
assertEquals("oD", text.term()); assertEquals("oD", text.term());
@ -45,9 +45,9 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
} }
public void testFilterWithMark() throws Exception { public void testFilterWithMark() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Do have a nice day")); // 1-4 length string "Do have a nice day")); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(Version.LUCENE_CURRENT, stream, '\u0001'); ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
TermAttribute text = filter TermAttribute text = filter
.getAttribute(TermAttribute.class); .getAttribute(TermAttribute.class);
assertTrue(filter.incrementToken()); assertTrue(filter.incrementToken());
@ -64,14 +64,14 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
} }
public void testReverseString() throws Exception { public void testReverseString() throws Exception {
assertEquals( "A", ReverseStringFilter.reverse( "A" ) ); assertEquals( "A", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "A" ) );
assertEquals( "BA", ReverseStringFilter.reverse( "AB" ) ); assertEquals( "BA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "AB" ) );
assertEquals( "CBA", ReverseStringFilter.reverse( "ABC" ) ); assertEquals( "CBA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "ABC" ) );
} }
public void testReverseChar() throws Exception { public void testReverseChar() throws Exception {
char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' }; char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' };
ReverseStringFilter.reverse( buffer, 2, 3 ); ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 2, 3 );
assertEquals( "ABEDCF", new String( buffer ) ); assertEquals( "ABEDCF", new String( buffer ) );
} }
@ -84,37 +84,37 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testReverseSupplementary() throws Exception { public void testReverseSupplementary() throws Exception {
// supplementary at end // supplementary at end
assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅")); assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅"));
// supplementary at end - 1 // supplementary at end - 1
assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅a")); assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅a"));
// supplementary at start // supplementary at start
assertEquals("fedcba𩬅", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "𩬅abcdef")); assertEquals("fedcba𩬅", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "𩬅abcdef"));
// supplementary at start + 1 // supplementary at start + 1
assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "z𩬅abcdef")); assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "z𩬅abcdef"));
// supplementary medial // supplementary medial
assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "abcd𩬅efg")); assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "abcd𩬅efg"));
} }
public void testReverseSupplementaryChar() throws Exception { public void testReverseSupplementaryChar() throws Exception {
// supplementary at end // supplementary at end
char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray(); char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7); ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer)); assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer));
// supplementary at end - 1 // supplementary at end - 1
buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray(); buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8); ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8);
assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer)); assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer));
// supplementary at start // supplementary at start
buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray(); buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7); ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer)); assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer));
// supplementary at start + 1 // supplementary at start + 1
buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray(); buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8); ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8);
assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer)); assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer));
// supplementary medial // supplementary medial
buffer = "abc瀛愯𩬅def".toCharArray(); buffer = "abc瀛愯𩬅def".toCharArray();
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7); ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
assertEquals("abcfed𩬅愯瀛", new String(buffer)); assertEquals("abcfed𩬅愯瀛", new String(buffer));
} }
} }

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestRomanianAnalyzer extends BaseTokenStreamTestCase { public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new RomanianAnalyzer(Version.LUCENE_CURRENT); new RomanianAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "absenţa", "absenţ"); checkOneTermReuse(a, "absenţa", "absenţ");
checkOneTermReuse(a, "absenţi", "absenţ"); checkOneTermReuse(a, "absenţi", "absenţ");
@ -46,7 +45,7 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("absenţa"); exclusionSet.add("absenţa");
Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT,
RomanianAnalyzer.getDefaultStopSet(), exclusionSet); RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "absenţa", "absenţa"); checkOneTermReuse(a, "absenţa", "absenţa");
checkOneTermReuse(a, "absenţi", "absenţ"); checkOneTermReuse(a, "absenţi", "absenţ");

View File

@ -44,8 +44,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
private File dataDir; private File dataDir;
@Override @Override
protected void setUp() throws Exception protected void setUp() throws Exception {
{
super.setUp(); super.setUp();
dataDir = new File(System.getProperty("dataDir", "./bin")); dataDir = new File(System.getProperty("dataDir", "./bin"));
} }
@ -71,7 +70,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
TokenStream in = ra.tokenStream("all", inWords); TokenStream in = ra.tokenStream("all", inWords);
RussianLetterTokenizer sample = RussianLetterTokenizer sample =
new RussianLetterTokenizer(Version.LUCENE_CURRENT, new RussianLetterTokenizer(TEST_VERSION_CURRENT,
sampleUnicode); sampleUnicode);
TermAttribute text = in.getAttribute(TermAttribute.class); TermAttribute text = in.getAttribute(TermAttribute.class);
@ -98,7 +97,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
public void testDigitsInRussianCharset() public void testDigitsInRussianCharset()
{ {
Reader reader = new StringReader("text 1000"); Reader reader = new StringReader("text 1000");
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT); RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
TokenStream stream = ra.tokenStream("", reader); TokenStream stream = ra.tokenStream("", reader);
TermAttribute termText = stream.getAttribute(TermAttribute.class); TermAttribute termText = stream.getAttribute(TermAttribute.class);
@ -126,7 +125,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" }); new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
assertAnalyzesToReuse(a, "Но знание это хранилось в тайне", assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
@ -135,9 +134,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
public void testWithStemExclusionSet() throws Exception { public void testWithStemExclusionSet() throws Exception {
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("представление"); set.add("представление");
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT, RussianAnalyzer.getDefaultStopSet() , set); Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });

View File

@ -30,8 +30,8 @@ import java.util.ArrayList;
@Deprecated @Deprecated
public class TestRussianStem extends LuceneTestCase public class TestRussianStem extends LuceneTestCase
{ {
private ArrayList words = new ArrayList(); private ArrayList<String> words = new ArrayList<String>();
private ArrayList stems = new ArrayList(); private ArrayList<String> stems = new ArrayList<String>();
public TestRussianStem(String name) public TestRussianStem(String name)
{ {
@ -42,8 +42,7 @@ public class TestRussianStem extends LuceneTestCase
* @see TestCase#setUp() * @see TestCase#setUp()
*/ */
@Override @Override
protected void setUp() throws Exception protected void setUp() throws Exception {
{
super.setUp(); super.setUp();
//System.out.println(new java.util.Date()); //System.out.println(new java.util.Date());
String str; String str;
@ -75,15 +74,6 @@ public class TestRussianStem extends LuceneTestCase
inStems.close(); inStems.close();
} }
/**
* @see TestCase#tearDown()
*/
@Override
protected void tearDown() throws Exception
{
super.tearDown();
}
public void testStem() public void testStem()
{ {
for (int i = 0; i < words.size(); i++) for (int i = 0; i < words.size(); i++)
@ -91,7 +81,7 @@ public class TestRussianStem extends LuceneTestCase
//if ( (i % 100) == 0 ) System.err.println(i); //if ( (i % 100) == 0 ) System.err.println(i);
String realStem = String realStem =
RussianStemmer.stemWord( RussianStemmer.stemWord(
(String) words.get(i)); words.get(i));
assertEquals("unicode", stems.get(i), realStem); assertEquals("unicode", stems.get(i), realStem);
} }
} }

View File

@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/** /**
* A test class for ShingleAnalyzerWrapper as regards queries and scoring. * A test class for ShingleAnalyzerWrapper as regards queries and scoring.
@ -86,7 +85,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception { protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
searcher = setUpSearcher(analyzer); searcher = setUpSearcher(analyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
Query q = qp.parse(qs); Query q = qp.parse(qs);
@ -106,7 +105,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/ */
public void testShingleAnalyzerWrapperQueryParsing() throws Exception { public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2), (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"test sentence"); "test sentence");
int[] ranks = new int[] { 1, 2, 0 }; int[] ranks = new int[] { 1, 2, 0 };
compareRanks(hits, ranks); compareRanks(hits, ranks);
@ -117,7 +116,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/ */
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception { public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2), (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"\"this sentence\""); "\"this sentence\"");
int[] ranks = new int[] { 0 }; int[] ranks = new int[] { 0 };
compareRanks(hits, ranks); compareRanks(hits, ranks);
@ -128,7 +127,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/ */
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception { public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2), (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"\"test sentence\""); "\"test sentence\"");
int[] ranks = new int[] { 1 }; int[] ranks = new int[] { 1 };
compareRanks(hits, ranks); compareRanks(hits, ranks);
@ -139,7 +138,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
*/ */
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception { public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2), (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
"+test +sentence"); "+test +sentence");
int[] ranks = new int[] { 1, 2 }; int[] ranks = new int[] { 1, 2 };
compareRanks(hits, ranks); compareRanks(hits, ranks);
@ -149,7 +148,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
* This shows how to construct a phrase query containing shingles. * This shows how to construct a phrase query containing shingles.
*/ */
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception { public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2); Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
searcher = setUpSearcher(analyzer); searcher = setUpSearcher(analyzer);
PhraseQuery q = new PhraseQuery(); PhraseQuery q = new PhraseQuery();
@ -178,7 +177,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
* in the right order and adjacent to each other. * in the right order and adjacent to each other.
*/ */
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception { public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2); Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
searcher = setUpSearcher(analyzer); searcher = setUpSearcher(analyzer);
BooleanQuery q = new BooleanQuery(); BooleanQuery q = new BooleanQuery();
@ -200,7 +199,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2); Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
assertAnalyzesToReuse(a, "please divide into shingles", assertAnalyzesToReuse(a, "please divide into shingles",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 }, new int[] { 0, 0, 7, 7, 14, 14, 19 },
@ -222,9 +221,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0) if (++invocationCount % 2 == 0)
return new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader); return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
else else
return new LetterTokenizer(Version.LUCENE_CURRENT, reader); return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
} }
} }
@ -249,7 +248,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNonDefaultMinShingleSize() throws Exception { public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 3, 4); = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 4);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles", assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this", "please divide this sentence", new String[] { "please", "please divide this", "please divide this sentence",
"divide", "divide this sentence", "divide this sentence into", "divide", "divide this sentence", "divide this sentence into",
@ -273,7 +272,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNonDefaultMinAndSameMaxShingleSize() throws Exception { public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 3, 3); = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 3);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles", assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this", new String[] { "please", "please divide this",
"divide", "divide this sentence", "divide", "divide this sentence",
@ -297,7 +296,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNoTokenSeparator() throws Exception { public void testNoTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer()); = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
analyzer.setTokenSeparator(""); analyzer.setTokenSeparator("");
assertAnalyzesToReuse(analyzer, "please divide into shingles", assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide", new String[] { "please", "pleasedivide",
@ -319,7 +318,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
public void testNullTokenSeparator() throws Exception { public void testNullTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer()); = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
analyzer.setTokenSeparator(null); analyzer.setTokenSeparator(null);
assertAnalyzesToReuse(analyzer, "please divide into shingles", assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide", new String[] { "please", "pleasedivide",
@ -340,7 +339,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
} }
public void testAltTokenSeparator() throws Exception { public void testAltTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer()); = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
analyzer.setTokenSeparator("<SEP>"); analyzer.setTokenSeparator("<SEP>");
assertAnalyzesToReuse(analyzer, "please divide into shingles", assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "please<SEP>divide", new String[] { "please", "please<SEP>divide",

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Version;
public class ShingleFilterTest extends BaseTokenStreamTestCase { public class ShingleFilterTest extends BaseTokenStreamTestCase {
@ -836,7 +835,7 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase {
public void testReset() throws Exception { public void testReset() throws Exception {
Tokenizer wsTokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("please divide this sentence")); Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
TokenStream filter = new ShingleFilter(wsTokenizer, 2); TokenStream filter = new ShingleFilter(wsTokenizer, 2);
assertTokenStreamContents(filter, assertTokenStreamContents(filter,
new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Version;
public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
@ -41,11 +40,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
public void testIterator() throws IOException { public void testIterator() throws IOException {
WhitespaceTokenizer wst = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("one two three four five")); WhitespaceTokenizer wst = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("one two three four five"));
ShingleMatrixFilter smf = new ShingleMatrixFilter(wst, 2, 2, '_', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec()); ShingleMatrixFilter smf = new ShingleMatrixFilter(wst, 2, 2, '_', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
int i; int i;
for(i=0; smf.incrementToken(); i++); for(i=0; smf.incrementToken(); i++) {}
assertEquals(4, i); assertEquals(4, i);
// call next once more. this should return false again rather than throwing an exception (LUCENE-1939) // call next once more. this should return false again rather than throwing an exception (LUCENE-1939)
@ -65,11 +64,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
assertFalse(ts.incrementToken()); assertFalse(ts.incrementToken());
TokenListStream tls; TokenListStream tls;
LinkedList tokens; LinkedList<Token> tokens;
// test a plain old token stream with synonyms translated to rows. // test a plain old token stream with synonyms translated to rows.
tokens = new LinkedList(); tokens = new LinkedList<Token>();
tokens.add(createToken("please", 0, 6)); tokens.add(createToken("please", 0, 6));
tokens.add(createToken("divide", 7, 13)); tokens.add(createToken("divide", 7, 13));
tokens.add(createToken("this", 14, 18)); tokens.add(createToken("this", 14, 18));
@ -101,11 +100,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
TokenStream ts; TokenStream ts;
TokenStream tls; TokenStream tls;
LinkedList tokens; LinkedList<Token> tokens;
// test a plain old token stream with synonyms tranlated to rows. // test a plain old token stream with synonyms tranlated to rows.
tokens = new LinkedList(); tokens = new LinkedList<Token>();
tokens.add(tokenFactory("hello", 1, 0, 4)); tokens.add(tokenFactory("hello", 1, 0, 4));
tokens.add(tokenFactory("greetings", 0, 0, 4)); tokens.add(tokenFactory("greetings", 0, 0, 4));
tokens.add(tokenFactory("world", 1, 5, 10)); tokens.add(tokenFactory("world", 1, 5, 10));
@ -145,7 +144,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec(); ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
tokens = new LinkedList(); tokens = new LinkedList<Token>();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn)); tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow)); tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn)); tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn));
@ -286,7 +285,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
// //
tokens = new LinkedList(); tokens = new LinkedList<Token>();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn)); tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow)); tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow)); tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow));
@ -413,11 +412,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
} }
private Token tokenFactory(String text, int startOffset, int endOffset) {
return tokenFactory(text, 1, 1f, startOffset, endOffset);
}
private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) { private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) {
Token token = new Token(startOffset, endOffset); Token token = new Token(startOffset, endOffset);
token.setTermBuffer(text); token.setTermBuffer(text);
@ -430,10 +424,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
return tokenFactory(text, posIncr, 1f, 0, 0); return tokenFactory(text, posIncr, 1f, 0, 0);
} }
private Token tokenFactory(String text, int posIncr, float weight) {
return tokenFactory(text, posIncr, weight, 0, 0);
}
private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) { private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) {
Token token = new Token(startOffset, endOffset); Token token = new Token(startOffset, endOffset);
token.setTermBuffer(text); token.setTermBuffer(text);
@ -460,17 +450,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
assertEquals(text, termAtt.term()); assertEquals(text, termAtt.term());
} }
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost) throws IOException {
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
assertTrue(ts.incrementToken());
assertEquals(text, termAtt.term());
assertEquals(positionIncrement, posIncrAtt.getPositionIncrement());
assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0);
}
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException { private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException {
TermAttribute termAtt = ts.addAttribute(TermAttribute.class); TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
@ -505,7 +484,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
public static class TokenListStream extends TokenStream { public static class TokenListStream extends TokenStream {
private Collection tokens; private Collection<Token> tokens;
TermAttribute termAtt; TermAttribute termAtt;
PositionIncrementAttribute posIncrAtt; PositionIncrementAttribute posIncrAtt;
PayloadAttribute payloadAtt; PayloadAttribute payloadAtt;
@ -513,7 +492,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
TypeAttribute typeAtt; TypeAttribute typeAtt;
FlagsAttribute flagsAtt; FlagsAttribute flagsAtt;
public TokenListStream(Collection tokens) { public TokenListStream(Collection<Token> tokens) {
this.tokens = tokens; this.tokens = tokens;
termAtt = addAttribute(TermAttribute.class); termAtt = addAttribute(TermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@ -523,7 +502,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
flagsAtt = addAttribute(FlagsAttribute.class); flagsAtt = addAttribute(FlagsAttribute.class);
} }
private Iterator iterator; private Iterator<Token> iterator;
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
@ -533,7 +512,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
if (!iterator.hasNext()) { if (!iterator.hasNext()) {
return false; return false;
} }
Token prototype = (Token) iterator.next(); Token prototype = iterator.next();
clearAttributes(); clearAttributes();
termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength()); termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
posIncrAtt.setPositionIncrement(prototype.getPositionIncrement()); posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.TeeSinkTokenFilter;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.util.Version;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase { public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
@ -37,7 +36,7 @@ public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException { public void test() throws IOException {
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US)); DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006"; String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter); SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
int count = 0; int count = 0;

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.TeeSinkTokenFilter;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.util.Version;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase { public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
@ -35,7 +34,7 @@ public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException { public void test() throws IOException {
TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4); TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
String test = "The quick red fox jumped over the lazy brown dogs"; String test = "The quick red fox jumped over the lazy brown dogs";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter); SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
int count = 0; int count = 0;

View File

@ -27,11 +27,9 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase { public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
public TokenTypeSinkTokenizerTest(String s) { public TokenTypeSinkTokenizerTest(String s) {
super(s); super(s);
} }
@ -40,7 +38,7 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
String test = "The quick red fox jumped over the lazy brown dogs"; String test = "The quick red fox jumped over the lazy brown dogs";
TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)))); TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter); SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
boolean seenDogs = false; boolean seenDogs = false;

View File

@ -33,13 +33,13 @@ import org.apache.lucene.util.Version;
public class TestSnowball extends BaseTokenStreamTestCase { public class TestSnowball extends BaseTokenStreamTestCase {
public void testEnglish() throws Exception { public void testEnglish() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
assertAnalyzesTo(a, "he abhorred accents", assertAnalyzesTo(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"}); new String[]{"he", "abhor", "accent"});
} }
public void testStopwords() throws Exception { public void testStopwords() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English", Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English",
StandardAnalyzer.STOP_WORDS_SET); StandardAnalyzer.STOP_WORDS_SET);
assertAnalyzesTo(a, "the quick brown fox jumped", assertAnalyzesTo(a, "the quick brown fox jumped",
new String[]{"quick", "brown", "fox", "jump"}); new String[]{"quick", "brown", "fox", "jump"});
@ -50,7 +50,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
* we lowercase I correct for non-Turkish languages in either case. * we lowercase I correct for non-Turkish languages in either case.
*/ */
public void testEnglishLowerCase() throws Exception { public void testEnglishLowerCase() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" }); assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" });
assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" }); assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" });
@ -63,7 +63,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
* Test turkish lowercasing * Test turkish lowercasing
*/ */
public void testTurkish() throws Exception { public void testTurkish() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "Turkish"); Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish");
assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" }); assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" }); assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
@ -84,7 +84,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English"); Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
assertAnalyzesToReuse(a, "he abhorred accents", assertAnalyzesToReuse(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"}); new String[]{"he", "abhor", "accent"});
assertAnalyzesToReuse(a, "she abhorred him", assertAnalyzesToReuse(a, "she abhorred him",

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new SwedishAnalyzer(Version.LUCENE_CURRENT); new SwedishAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "jaktkarlarne", "jaktkarl"); checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
@ -46,7 +45,7 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("jaktkarlarne"); exclusionSet.add("jaktkarlarne");
Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT,
SwedishAnalyzer.getDefaultStopSet(), exclusionSet); SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne"); checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); checkOneTermReuse(a, "jaktkarlens", "jaktkarl");

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.th;
*/ */
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/** /**
* Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer * Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
@ -32,7 +31,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* testcase for offsets * testcase for offsets
*/ */
public void testOffsets() throws Exception { public void testOffsets() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์", assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"}, new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
new int[] { 0, 2, 7, 9, 12 }, new int[] { 0, 2, 7, 9, 12 },
new int[] { 2, 7, 9, 12, 17}); new int[] { 2, 7, 9, 12, 17});
@ -50,7 +49,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks! * Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
*/ */
public void testBuggyTokenType() throws Exception { public void testBuggyTokenType() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓", assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" }, new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" }); new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
} }
@ -64,7 +63,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
*/ */
public void testAnalyzer() throws Exception { public void testAnalyzer() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT); ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "", new String[] {}); assertAnalyzesTo(analyzer, "", new String[] {});
@ -89,7 +88,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* Test that position increments are adjusted correctly for stopwords. * Test that position increments are adjusted correctly for stopwords.
*/ */
public void testPositionIncrements() throws Exception { public void testPositionIncrements() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT); ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า", assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" }, new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
@ -106,7 +105,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
} }
public void testReusableTokenStream() throws Exception { public void testReusableTokenStream() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT); ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesToReuse(analyzer, "", new String[] {}); assertAnalyzesToReuse(analyzer, "", new String[] {});
assertAnalyzesToReuse( assertAnalyzesToReuse(

View File

@ -23,18 +23,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestTurkishAnalyzer extends BaseTokenStreamTestCase { public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the /** This test fails with NPE when the
* stopwords file is missing in classpath */ * stopwords file is missing in classpath */
public void testResourcesAvailable() { public void testResourcesAvailable() {
new TurkishAnalyzer(Version.LUCENE_CURRENT); new TurkishAnalyzer(TEST_VERSION_CURRENT);
} }
/** test stopwords and stemming */ /** test stopwords and stemming */
public void testBasics() throws IOException { public void testBasics() throws IOException {
Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT); Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT);
// stemming // stemming
checkOneTermReuse(a, "ağacı", "ağaç"); checkOneTermReuse(a, "ağacı", "ağaç");
checkOneTermReuse(a, "ağaç", "ağaç"); checkOneTermReuse(a, "ağaç", "ağaç");
@ -46,7 +45,7 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
public void testExclude() throws IOException { public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>(); Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("ağacı"); exclusionSet.add("ağacı");
Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT, Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT,
TurkishAnalyzer.getDefaultStopSet(), exclusionSet); TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "ağacı", "ağacı"); checkOneTermReuse(a, "ağacı", "ağacı");
checkOneTermReuse(a, "ağaç", "ağaç"); checkOneTermReuse(a, "ağaç", "ağaç");

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/** /**
* Test the Turkish lowercase filter. * Test the Turkish lowercase filter.
@ -33,7 +32,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
* Test composed forms * Test composed forms
*/ */
public void testTurkishLowerCaseFilter() throws Exception { public void testTurkishLowerCaseFilter() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"\u0130STANBUL \u0130ZM\u0130R ISPARTA")); "\u0130STANBUL \u0130ZM\u0130R ISPARTA"));
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir", assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
@ -44,7 +43,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
* Test decomposed forms * Test decomposed forms
*/ */
public void testDecomposed() throws Exception { public void testDecomposed() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA")); "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"));
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir", assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
@ -57,7 +56,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
* to U+0130 + U+0316, and is lowercased the same way. * to U+0130 + U+0316, and is lowercased the same way.
*/ */
public void testDecomposed2() throws Exception { public void testDecomposed2() throws Exception {
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader( TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA")); "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"));
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir", assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",

View File

@ -20,8 +20,6 @@ package org.apache.lucene.ant;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser;
@ -31,13 +29,13 @@ import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.tools.ant.Project; import org.apache.tools.ant.Project;
import org.apache.tools.ant.types.FileSet; import org.apache.tools.ant.types.FileSet;
import org.apache.lucene.util.Version; import org.apache.lucene.util.LuceneTestCase;
/** /**
* Test cases for index task * Test cases for index task
* *
*/ */
public class IndexTaskTest extends TestCase { public class IndexTaskTest extends LuceneTestCase {
private final static String docHandler = private final static String docHandler =
"org.apache.lucene.ant.FileExtensionDocumentHandler"; "org.apache.lucene.ant.FileExtensionDocumentHandler";
@ -55,7 +53,8 @@ public class IndexTaskTest extends TestCase {
*@exception IOException Description of Exception *@exception IOException Description of Exception
*/ */
@Override @Override
public void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp();
Project project = new Project(); Project project = new Project();
IndexTask task = new IndexTask(); IndexTask task = new IndexTask();
@ -71,12 +70,12 @@ public class IndexTaskTest extends TestCase {
dir = FSDirectory.open(indexDir); dir = FSDirectory.open(indexDir);
searcher = new IndexSearcher(dir, true); searcher = new IndexSearcher(dir, true);
analyzer = new StopAnalyzer(Version.LUCENE_CURRENT); analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
} }
public void testSearch() throws Exception { public void testSearch() throws Exception {
Query query = new QueryParser(Version.LUCENE_CURRENT, "contents",analyzer).parse("test"); Query query = new QueryParser(TEST_VERSION_CURRENT, "contents",analyzer).parse("test");
int numHits = searcher.search(query, null, 1000).totalHits; int numHits = searcher.search(query, null, 1000).totalHits;
@ -88,9 +87,10 @@ public class IndexTaskTest extends TestCase {
* TODO: remove indexDir? * TODO: remove indexDir?
*/ */
@Override @Override
public void tearDown() throws IOException { protected void tearDown() throws Exception {
searcher.close(); searcher.close();
dir.close(); dir.close();
super.tearDown();
} }
} }

Some files were not shown because too many files have changed in this diff Show More