mirror of https://github.com/apache/lucene.git
LUCENE-2285: Code cleanups to remove compiler warnings in eclipse.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@917019 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e358c3f2dd
commit
efb74380fd
13
CHANGES.txt
13
CHANGES.txt
|
@ -186,7 +186,9 @@ Optimizations
|
|||
|
||||
* LUCENE-2195: Speedup CharArraySet if set is empty.
|
||||
(Simon Willnauer via Robert Muir)
|
||||
|
||||
|
||||
* LUCENE-2285: Code cleanup. (Shai Erera via Uwe Schindler)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
|
||||
|
@ -209,10 +211,11 @@ Test Cases
|
|||
|
||||
* LUCENE-2170: Fix thread starvation problems. (Uwe Schindler)
|
||||
|
||||
* LUCENE-2248, LUCENE-2251: Refactor tests to not use Version.LUCENE_CURRENT,
|
||||
but instead use a global static value from LuceneTestCase(J4), that
|
||||
contains the release version. (Uwe Schindler, Simon Willnauer)
|
||||
|
||||
* LUCENE-2248, LUCENE-2251, LUCENE-2285: Refactor tests to not use
|
||||
Version.LUCENE_CURRENT, but instead use a global static value
|
||||
from LuceneTestCase(J4), that contains the release version.
|
||||
(Uwe Schindler, Simon Willnauer, Shai Erera)
|
||||
|
||||
================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
|
@ -162,14 +161,16 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
|||
this(matchVersion, WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from an {@link ArabicLetterTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter},
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided
|
||||
* and {@link ArabicStemFilter}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link ArabicLetterTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerTokenFilter}
|
||||
* if a stem exclusion set is provided and {@link ArabicStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
|
@ -117,15 +116,18 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
super(matchVersion, stopwords);
|
||||
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
|
||||
matchVersion, stemExclusionSet)); }
|
||||
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link BulgarianStemFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link BulgarianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -191,12 +190,16 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
|||
excltable = WordlistLoader.getWordSet( exclusionlist );
|
||||
setPreviousTokenStream(null); // force a new stemmer to be created
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
|
||||
* {@link BrazilianStemFilter}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
|
||||
* , and {@link BrazilianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cjk;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.cn;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -35,11 +34,13 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
public final class ChineseAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
|
||||
* provided {@link Reader}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a
|
||||
* {@link ChineseTokenizer} filtered with {@link ChineseFilter}
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link ChineseTokenizer} filtered with
|
||||
* {@link ChineseFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -113,7 +113,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
* strings.
|
||||
*/
|
||||
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
|
||||
HyphenationTree hyphenator, Set dictionary) {
|
||||
HyphenationTree hyphenator, Set<?> dictionary) {
|
||||
this(input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
|
||||
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
* Add only the longest matching subword to the stream
|
||||
*/
|
||||
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
|
||||
HyphenationTree hyphenator, Set dictionary, int minWordSize,
|
||||
HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
|
||||
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
|
||||
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
|
||||
onlyLongestMatch);
|
||||
|
@ -201,7 +201,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
*/
|
||||
@Deprecated
|
||||
public HyphenationCompoundWordTokenFilter(TokenStream input,
|
||||
HyphenationTree hyphenator, Set dictionary) {
|
||||
HyphenationTree hyphenator, Set<?> dictionary) {
|
||||
this(Version.LUCENE_30, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
|
||||
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
|
||||
}
|
||||
|
@ -223,7 +223,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
*/
|
||||
@Deprecated
|
||||
public HyphenationCompoundWordTokenFilter(TokenStream input,
|
||||
HyphenationTree hyphenator, Set dictionary, int minWordSize,
|
||||
HyphenationTree hyphenator, Set<?> dictionary, int minWordSize,
|
||||
int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
|
||||
super(Version.LUCENE_30, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
|
||||
onlyLongestMatch);
|
||||
|
|
|
@ -83,7 +83,7 @@ public class CharVector implements Cloneable, Serializable {
|
|||
|
||||
@Override
|
||||
public Object clone() {
|
||||
CharVector cv = new CharVector((char[]) array.clone(), blockSize);
|
||||
CharVector cv = new CharVector(array.clone(), blockSize);
|
||||
cv.n = this.n;
|
||||
return cv;
|
||||
}
|
||||
|
|
|
@ -26,11 +26,6 @@ public class Hyphenation {
|
|||
|
||||
private int[] hyphenPoints;
|
||||
|
||||
/**
|
||||
* number of hyphenation points in word
|
||||
*/
|
||||
private int len;
|
||||
|
||||
/**
|
||||
* rawWord as made of alternating strings and {@link Hyphen Hyphen} instances
|
||||
*/
|
||||
|
|
|
@ -44,7 +44,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
|
|||
/**
|
||||
* This map stores hyphenation exceptions
|
||||
*/
|
||||
protected HashMap<String,ArrayList> stoplist;
|
||||
protected HashMap<String,ArrayList<Object>> stoplist;
|
||||
|
||||
/**
|
||||
* This map stores the character classes
|
||||
|
@ -57,7 +57,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
|
|||
private transient TernaryTree ivalues;
|
||||
|
||||
public HyphenationTree() {
|
||||
stoplist = new HashMap<String,ArrayList>(23); // usually a small table
|
||||
stoplist = new HashMap<String,ArrayList<Object>>(23); // usually a small table
|
||||
classmap = new TernaryTree();
|
||||
vspace = new ByteVector();
|
||||
vspace.alloc(1); // this reserves index 0, which we don't use
|
||||
|
@ -363,7 +363,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
|
|||
if (stoplist.containsKey(sw)) {
|
||||
// assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
|
||||
// null)
|
||||
ArrayList hw = stoplist.get(sw);
|
||||
ArrayList<Object> hw = stoplist.get(sw);
|
||||
int j = 0;
|
||||
for (i = 0; i < hw.size(); i++) {
|
||||
Object o = hw.get(i);
|
||||
|
@ -443,7 +443,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer,
|
|||
* @param hyphenatedword a vector of alternating strings and
|
||||
* {@link Hyphen hyphen} objects.
|
||||
*/
|
||||
public void addException(String word, ArrayList hyphenatedword) {
|
||||
public void addException(String word, ArrayList<Object> hyphenatedword) {
|
||||
stoplist.put(word, hyphenatedword);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ public interface PatternConsumer {
|
|||
* his own hyphenation. A hyphenatedword is a vector of alternating String's
|
||||
* and {@link Hyphen Hyphen} instances
|
||||
*/
|
||||
void addException(String word, ArrayList hyphenatedword);
|
||||
void addException(String word, ArrayList<Object> hyphenatedword);
|
||||
|
||||
/**
|
||||
* Add hyphenation patterns.
|
||||
|
|
|
@ -51,7 +51,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
|
||||
StringBuilder token;
|
||||
|
||||
ArrayList exception;
|
||||
ArrayList<Object> exception;
|
||||
|
||||
char hyphenChar;
|
||||
|
||||
|
@ -199,8 +199,8 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
return pat.toString();
|
||||
}
|
||||
|
||||
protected ArrayList normalizeException(ArrayList ex) {
|
||||
ArrayList res = new ArrayList();
|
||||
protected ArrayList<Object> normalizeException(ArrayList<?> ex) {
|
||||
ArrayList<Object> res = new ArrayList<Object>();
|
||||
for (int i = 0; i < ex.size(); i++) {
|
||||
Object item = ex.get(i);
|
||||
if (item instanceof String) {
|
||||
|
@ -230,7 +230,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
return res;
|
||||
}
|
||||
|
||||
protected String getExceptionWord(ArrayList ex) {
|
||||
protected String getExceptionWord(ArrayList<?> ex) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (int i = 0; i < ex.size(); i++) {
|
||||
Object item = ex.get(i);
|
||||
|
@ -291,7 +291,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
currElement = ELEM_PATTERNS;
|
||||
} else if (local.equals("exceptions")) {
|
||||
currElement = ELEM_EXCEPTIONS;
|
||||
exception = new ArrayList();
|
||||
exception = new ArrayList<Object>();
|
||||
} else if (local.equals("hyphen")) {
|
||||
if (token.length() > 0) {
|
||||
exception.add(token.toString());
|
||||
|
@ -308,6 +308,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
* java.lang.String, java.lang.String)
|
||||
*/
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void endElement(String uri, String local, String raw) {
|
||||
|
||||
if (token.length() > 0) {
|
||||
|
@ -319,7 +320,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
case ELEM_EXCEPTIONS:
|
||||
exception.add(word);
|
||||
exception = normalizeException(exception);
|
||||
consumer.addException(getExceptionWord(exception),
|
||||
consumer.addException(getExceptionWord(exception),
|
||||
(ArrayList) exception.clone());
|
||||
break;
|
||||
case ELEM_PATTERNS:
|
||||
|
@ -344,6 +345,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
/**
|
||||
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void characters(char ch[], int start, int length) {
|
||||
StringBuffer chars = new StringBuffer(length);
|
||||
|
@ -428,7 +430,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
|
|||
System.out.println("class: " + c);
|
||||
}
|
||||
|
||||
public void addException(String w, ArrayList e) {
|
||||
public void addException(String w, ArrayList<Object> e) {
|
||||
System.out.println("exception: " + w + " : " + e.toString());
|
||||
}
|
||||
|
||||
|
|
|
@ -351,10 +351,10 @@ public class TernaryTree implements Cloneable, Serializable {
|
|||
@Override
|
||||
public Object clone() {
|
||||
TernaryTree t = new TernaryTree();
|
||||
t.lo = (char[]) this.lo.clone();
|
||||
t.hi = (char[]) this.hi.clone();
|
||||
t.eq = (char[]) this.eq.clone();
|
||||
t.sc = (char[]) this.sc.clone();
|
||||
t.lo = this.lo.clone();
|
||||
t.hi = this.hi.clone();
|
||||
t.eq = this.eq.clone();
|
||||
t.sc = this.sc.clone();
|
||||
t.kv = (CharVector) this.kv.clone();
|
||||
t.root = this.root;
|
||||
t.freenode = this.freenode;
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cz;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
|
@ -216,16 +215,20 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
|
|||
stoptable = Collections.emptySet();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, and {@link CzechStemFilter} (only if version is
|
||||
* >= LUCENE_31). If a version is >= LUCENE_31 and a stem exclusion set
|
||||
* is provided via {@link #CzechAnalyzer(Version, Set, Set)} a
|
||||
* {@link KeywordMarkerTokenFilter} is added before {@link CzechStemFilter}.
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
* a version is >= LUCENE_31 and a stem exclusion set is provided via
|
||||
* {@link #CzechAnalyzer(Version, Set, Set)} a
|
||||
* {@link KeywordMarkerTokenFilter} is added before
|
||||
* {@link CzechStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -222,16 +221,17 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
exclusionSet = WordlistLoader.getWordSet(exclusionlist);
|
||||
setPreviousTokenStream(null); // force a new stemmer to be created
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
|
||||
* provided {@link Reader}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a
|
||||
* {@link StandardTokenizer} filtered with {@link StandardFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, and
|
||||
* {@link SnowballFilter}
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.el;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -120,15 +119,17 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase
|
|||
{
|
||||
this(matchVersion, stopwords.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
|
||||
* provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a
|
||||
* {@link StandardTokenizer} filtered with
|
||||
* {@link GreekLowerCaseFilter}, {@link StandardFilter} and {@link StopFilter}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link GreekLowerCaseFilter}, {@link StandardFilter} and
|
||||
* {@link StopFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -90,13 +89,16 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link PorterStemFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link PorterStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -136,12 +135,13 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a {@link ArabicLetterTokenizer}
|
||||
* filtered with {@link LowerCaseFilter},
|
||||
* {@link ArabicNormalizationFilter},
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link ArabicLetterTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
|
||||
* {@link PersianNormalizationFilter} and Persian Stop words
|
||||
*/
|
||||
@Override
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.fr;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
|
@ -225,14 +224,16 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
|
||||
* {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link ElisionFilter},
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
|
||||
* and {@link SnowballFilter}
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.Reader;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
|
@ -106,15 +105,16 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a {@link IndicTokenizer}
|
||||
* filtered with {@link LowerCaseFilter},
|
||||
* {@link IndicNormalizationFilter},
|
||||
* {@link HindiNormalizationFilter},
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
|
||||
* {@link HindiStemFilter}, and Hindi Stop words
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link IndicTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
|
||||
* {@link HindiNormalizationFilter}, {@link KeywordMarkerTokenFilter}
|
||||
* if a stem exclusion set is provided, {@link HindiStemFilter}, and
|
||||
* Hindi Stop words
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -311,7 +311,7 @@ public final class PatternAnalyzer extends Analyzer {
|
|||
|
||||
return new String(output, 0, len);
|
||||
} finally {
|
||||
if (input != null) input.close();
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
|
|||
if (!input.incrementToken()) {
|
||||
return false;
|
||||
} else {
|
||||
curTermBuffer = (char[]) termAtt.termBuffer().clone();
|
||||
curTermBuffer = termAtt.termBuffer().clone();
|
||||
curTermLength = termAtt.termLength();
|
||||
curGramSize = minGram;
|
||||
tokStart = offsetAtt.startOffset();
|
||||
|
|
|
@ -79,7 +79,7 @@ public final class NGramTokenFilter extends TokenFilter {
|
|||
if (!input.incrementToken()) {
|
||||
return false;
|
||||
} else {
|
||||
curTermBuffer = (char[]) termAtt.termBuffer().clone();
|
||||
curTermBuffer = termAtt.termBuffer().clone();
|
||||
curTermLength = termAtt.termLength();
|
||||
curGramSize = minGram;
|
||||
curPos = 0;
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -192,7 +192,7 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
* if there stopwords, it is a StopFilter around wrapped.
|
||||
*/
|
||||
TokenStream withStopFilter;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -110,13 +109,16 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -160,16 +159,17 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
|||
this(matchVersion, stopwords.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
|
||||
* provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a
|
||||
* {@link StandardTokenizer} filtered with {@link StandardFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
|
||||
* and {@link SnowballFilter}
|
||||
*/
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -26,7 +26,7 @@ package org.apache.lucene.analysis.ru;
|
|||
class RussianStemmer
|
||||
{
|
||||
// positions of RV, R1 and R2 respectively
|
||||
private int RV, R1, R2;
|
||||
private int RV, /*R1,*/ R2;
|
||||
|
||||
// letters (currently unused letters are commented out)
|
||||
private final static char A = '\u0430';
|
||||
|
@ -263,11 +263,7 @@ class RussianStemmer
|
|||
if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
|
||||
return false;
|
||||
// if adjective ending was found, try for participle ending.
|
||||
// variable r is unused, we are just interested in the side effect of
|
||||
// findAndRemoveEnding():
|
||||
boolean r =
|
||||
findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
|
||||
||
|
||||
if (!findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors))
|
||||
findAndRemoveEnding(stemmingZone, participleEndings2);
|
||||
return true;
|
||||
}
|
||||
|
@ -391,7 +387,7 @@ class RussianStemmer
|
|||
private void markPositions(String word)
|
||||
{
|
||||
RV = 0;
|
||||
R1 = 0;
|
||||
// R1 = 0;
|
||||
R2 = 0;
|
||||
int i = 0;
|
||||
// find RV
|
||||
|
@ -409,7 +405,7 @@ class RussianStemmer
|
|||
}
|
||||
if (word.length() - 1 < ++i)
|
||||
return; // R1 zone is empty
|
||||
R1 = i;
|
||||
// R1 = i;
|
||||
// find R2
|
||||
while (word.length() > i && !isVowel(word.charAt(i)))
|
||||
{
|
||||
|
@ -532,13 +528,9 @@ class RussianStemmer
|
|||
if (!perfectiveGerund(stemmingZone))
|
||||
{
|
||||
reflexive(stemmingZone);
|
||||
// variable r is unused, we are just interested in the flow that gets
|
||||
// created by logical expression: apply adjectival(); if that fails,
|
||||
// apply verb() etc
|
||||
boolean r =
|
||||
adjectival(stemmingZone)
|
||||
|| verb(stemmingZone)
|
||||
|| noun(stemmingZone);
|
||||
if (!adjectival(stemmingZone))
|
||||
if (!verb(stemmingZone))
|
||||
noun(stemmingZone);
|
||||
}
|
||||
// Step 2
|
||||
removeI(stemmingZone);
|
||||
|
|
|
@ -391,8 +391,8 @@ public final class ShingleFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
/**
|
||||
* {@see #advance()}
|
||||
* @return the current value.
|
||||
* @see #advance()
|
||||
*/
|
||||
public int getValue() {
|
||||
return value;
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.Token;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
|
||||
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
||||
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
|
||||
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.sinks;
|
|||
|
||||
import java.text.DateFormat;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter;
|
||||
|
@ -42,7 +41,7 @@ public class DateRecognizerSinkFilter extends SinkFilter {
|
|||
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
|
||||
*/
|
||||
public DateRecognizerSinkFilter() {
|
||||
this(SimpleDateFormat.getDateInstance());
|
||||
this(DateFormat.getDateInstance());
|
||||
}
|
||||
|
||||
public DateRecognizerSinkFilter(DateFormat dateFormat) {
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -106,13 +105,16 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.th;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
|
@ -45,12 +44,14 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates {@link TokenStreamComponents} used to tokenize all the text in the
|
||||
* provided {@link Reader}.
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link TokenStreamComponents} built from a
|
||||
* {@link StandardTokenizer} filtered with {@link StandardFilter},
|
||||
* {@link ThaiWordFilter}, and {@link StopFilter}
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ThaiWordFilter}, and
|
||||
* {@link StopFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -109,11 +108,14 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
|
||||
* {@link Reader}.
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
*/
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Arabic Analyzer
|
||||
|
@ -35,14 +34,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ArabicAnalyzer(Version.LUCENE_CURRENT);
|
||||
new ArabicAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Some simple tests showing some features of the analyzer, how some regular forms will conflate
|
||||
*/
|
||||
public void testBasicFeatures() throws Exception {
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
|
||||
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
|
||||
|
||||
|
@ -63,7 +62,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Simple tests to show things are getting reset correctly, etc.
|
||||
*/
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" });
|
||||
assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker
|
||||
}
|
||||
|
@ -72,7 +71,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Non-arabic text gets treated in a similar way as SimpleAnalyzer.
|
||||
*/
|
||||
public void testEnglishInput() throws Exception {
|
||||
assertAnalyzesTo(new ArabicAnalyzer(Version.LUCENE_CURRENT), "English text.", new String[] {
|
||||
assertAnalyzesTo(new ArabicAnalyzer(TEST_VERSION_CURRENT), "English text.", new String[] {
|
||||
"english", "text" });
|
||||
}
|
||||
|
||||
|
@ -82,7 +81,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testCustomStopwords() throws Exception {
|
||||
Set<String> set = new HashSet<String>();
|
||||
Collections.addAll(set, "the", "and", "a");
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, set);
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set);
|
||||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
}
|
||||
|
@ -90,12 +89,12 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testWithStemExclusionSet() throws IOException {
|
||||
Set<String> set = new HashSet<String>();
|
||||
set.add("ساهدهات");
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
|
||||
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
|
||||
|
||||
|
||||
a = new ArabicAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
|
||||
a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
|
||||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
|
||||
assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
|
||||
}
|
||||
|
|
|
@ -21,11 +21,9 @@ import java.io.IOException;
|
|||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Arabic Normalization Filter
|
||||
*
|
||||
*/
|
||||
public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -86,7 +84,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private void check(final String input, final String expected) throws IOException {
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader(input));
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
|
||||
assertTokenStreamContents(filter, new String[]{expected});
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Arabic Normalization Filter
|
||||
|
@ -116,16 +115,16 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("ساهدهات");
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader("ساهدهات"));
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
|
||||
|
||||
ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerTokenFilter(tokenStream, set));
|
||||
assertTokenStreamContents(filter, new String[]{"ساهدهات"});
|
||||
}
|
||||
|
||||
private void check(final String input, final String expected) throws IOException {
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT, new StringReader(input));
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
|
||||
assertTokenStreamContents(filter, new String[]{expected});
|
||||
}
|
||||
|
|
|
@ -34,23 +34,23 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This test fails with NPE when the stopwords file is missing in classpath
|
||||
*/
|
||||
public void testResourcesAvailable() {
|
||||
new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
public void testStopwords() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"});
|
||||
}
|
||||
|
||||
public void testCustomStopwords() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT, Collections
|
||||
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, Collections
|
||||
.emptySet());
|
||||
assertAnalyzesTo(a, "Как се казваш?",
|
||||
new String[] {"как", "се", "казваш"});
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(a, "документи", new String[] {"документ"});
|
||||
assertAnalyzesToReuse(a, "документ", new String[] {"документ"});
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Test some examples from the paper
|
||||
*/
|
||||
public void testBasicExamples() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(a, "енергийни кризи", new String[] {"енергийн", "криз"});
|
||||
assertAnalyzesTo(a, "Атомната енергия", new String[] {"атомн", "енерг"});
|
||||
|
||||
|
@ -72,7 +72,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testWithStemExclusionSet() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
|
||||
set.add("строеве");
|
||||
Analyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
* common (and some rare) plural pattern is listed.
|
||||
*/
|
||||
public void testMasculineNouns() throws IOException {
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
// -и pattern
|
||||
assertAnalyzesTo(a, "град", new String[] {"град"});
|
||||
|
@ -101,7 +101,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
* Test showing how feminine noun forms conflate
|
||||
*/
|
||||
public void testFeminineNouns() throws IOException {
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
assertAnalyzesTo(a, "вест", new String[] {"вест"});
|
||||
assertAnalyzesTo(a, "вестта", new String[] {"вест"});
|
||||
|
@ -114,7 +114,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
* plural pattern is listed
|
||||
*/
|
||||
public void testNeuterNouns() throws IOException {
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
// -а pattern
|
||||
assertAnalyzesTo(a, "дърво", new String[] {"дърв"});
|
||||
|
@ -142,7 +142,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
* Test showing how adjectival forms conflate
|
||||
*/
|
||||
public void testAdjectives() throws IOException {
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(a, "красив", new String[] {"красив"});
|
||||
assertAnalyzesTo(a, "красивия", new String[] {"красив"});
|
||||
assertAnalyzesTo(a, "красивият", new String[] {"красив"});
|
||||
|
@ -158,7 +158,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
* Test some exceptional rules, implemented as rewrites.
|
||||
*/
|
||||
public void testExceptions() throws IOException {
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
// ци -> к
|
||||
assertAnalyzesTo(a, "собственик", new String[] {"собственик"});
|
||||
|
@ -215,7 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
|
||||
set.add("строеве");
|
||||
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
|
||||
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("строевете строеве"));
|
||||
|
||||
BulgarianStemFilter filter = new BulgarianStemFilter(
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Brazilian Stem Filter, which only modifies the term text.
|
||||
|
@ -128,7 +127,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkReuse(a, "boa", "boa");
|
||||
checkReuse(a, "boainain", "boainain");
|
||||
checkReuse(a, "boas", "boas");
|
||||
|
@ -136,35 +135,35 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testStemExclusionTable() throws Exception {
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
|
||||
a.setStemExclusionTable(new String[] { "quintessência" });
|
||||
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
|
||||
}
|
||||
|
||||
public void testStemExclusionTableBWCompat() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("Brasília");
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||
new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader("Brasília Brasilia")), set);
|
||||
new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
|
||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("Brasília");
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Brasília Brasilia")), set));
|
||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
|
||||
}
|
||||
|
||||
public void testWithKeywordAttributeAndExclusionTable() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("Brasília");
|
||||
CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set1.add("Brasilia");
|
||||
BrazilianStemFilter filter = new BrazilianStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Brasília Brasilia")), set), set1);
|
||||
assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
|
||||
}
|
||||
|
@ -174,14 +173,14 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkReuse(a, "quintessência", "quintessente");
|
||||
a.setStemExclusionTable(new String[] { "quintessência" });
|
||||
checkReuse(a, "quintessência", "quintessência");
|
||||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new BrazilianAnalyzer(Version.LUCENE_CURRENT), input, expected);
|
||||
checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected);
|
||||
}
|
||||
|
||||
private void checkReuse(Analyzer a, String input, String expected) throws Exception {
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -42,7 +41,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
|
||||
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
|
||||
String terms[] = new String[out_tokens.length];
|
||||
int startOffsets[] = new int[out_tokens.length];
|
||||
int endOffsets[] = new int[out_tokens.length];
|
||||
|
@ -57,7 +56,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
|
||||
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
|
||||
String terms[] = new String[out_tokens.length];
|
||||
int startOffsets[] = new int[out_tokens.length];
|
||||
int endOffsets[] = new int[out_tokens.length];
|
||||
|
@ -213,13 +212,13 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02",
|
||||
new String[] { "\u4e00\u4e01", "\u4e01\u4e02"});
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
|
||||
String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
|
||||
|
||||
TestToken[] out_tokens = {
|
||||
|
|
|
@ -28,17 +28,11 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||
static final File dataDir = new File(System.getProperty("dataDir", "./bin"));
|
||||
static final File testFile = new File(dataDir, "org/apache/lucene/analysis/compound/da_UTF8.xml");
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
public void testHyphenationCompoundWordsDA() throws Exception {
|
||||
String[] dict = { "læse", "hest" };
|
||||
|
||||
|
@ -47,8 +41,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
|
||||
.getHyphenationTree(reader);
|
||||
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(Version.LUCENE_CURRENT,
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"min veninde som er lidt af en læsehest")), hyphenator,
|
||||
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||
|
@ -67,8 +61,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
.getHyphenationTree(reader);
|
||||
|
||||
// the word basket will not be added due to the longest match option
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(Version.LUCENE_CURRENT,
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"basketballkurv")), hyphenator, dict,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
|
||||
|
@ -84,8 +78,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll",
|
||||
"Sko", "Vind", "Rute", "Torkare", "Blad" };
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT,
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT,
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader(
|
||||
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
|
||||
dict);
|
||||
|
@ -113,8 +107,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll",
|
||||
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT,
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
|
||||
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
|
||||
|
@ -129,9 +123,9 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz",
|
||||
"Aufgabe", "Überwachung" };
|
||||
|
||||
Tokenizer wsTokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Rindfleischüberwachungsgesetz"));
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(Version.LUCENE_CURRENT,
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
wsTokenizer, dict,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||
|
|
|
@ -48,7 +48,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testStopWord() throws Exception {
|
||||
assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem",
|
||||
assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem",
|
||||
new String[] { "mluvim", "voln" });
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
|
||||
assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" });
|
||||
}
|
||||
|
@ -112,9 +112,9 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testWithStemExclusionSet() throws IOException{
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("hole");
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Czech Stemmer.
|
||||
|
@ -38,7 +37,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test showing how masculine noun forms conflate
|
||||
*/
|
||||
public void testMasculineNouns() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
/* animate ending with a hard consonant */
|
||||
assertAnalyzesTo(cz, "pán", new String[] { "pán" });
|
||||
|
@ -106,7 +105,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test showing how feminine noun forms conflate
|
||||
*/
|
||||
public void testFeminineNouns() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
/* ending with hard consonant */
|
||||
assertAnalyzesTo(cz, "kost", new String[] { "kost" });
|
||||
|
@ -150,7 +149,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test showing how neuter noun forms conflate
|
||||
*/
|
||||
public void testNeuterNouns() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
/* ending with o */
|
||||
assertAnalyzesTo(cz, "město", new String[] { "měst" });
|
||||
|
@ -193,7 +192,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test showing how adjectival forms conflate
|
||||
*/
|
||||
public void testAdjectives() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
/* ending with ý/á/é */
|
||||
assertAnalyzesTo(cz, "mladý", new String[] { "mlad" });
|
||||
|
@ -221,7 +220,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test some possessive suffixes
|
||||
*/
|
||||
public void testPossessive() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(cz, "Karlův", new String[] { "karl" });
|
||||
assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" });
|
||||
}
|
||||
|
@ -230,7 +229,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test some exceptional rules, implemented as rewrites.
|
||||
*/
|
||||
public void testExceptions() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
/* rewrite of št -> sk */
|
||||
assertAnalyzesTo(cz, "český", new String[] { "česk" });
|
||||
|
@ -270,16 +269,16 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
* Test that very short words are not stemmed.
|
||||
*/
|
||||
public void testDontStem() throws IOException {
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(cz, "e", new String[] { "e" });
|
||||
assertAnalyzesTo(cz, "zi", new String[] { "zi" });
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("hole");
|
||||
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerTokenFilter(
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hole desek")), set));
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
|
||||
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
||||
}
|
||||
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new DanishAnalyzer(Version.LUCENE_CURRENT);
|
||||
new DanishAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "undersøg", "undersøg");
|
||||
checkOneTermReuse(a, "undersøgelse", "undersøg");
|
||||
|
@ -46,7 +45,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("undersøgelse");
|
||||
Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT,
|
||||
DanishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
|
||||
checkOneTermReuse(a, "undersøg", "undersøg");
|
||||
|
|
|
@ -29,38 +29,38 @@ import org.apache.lucene.util.Version;
|
|||
|
||||
public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "Tisch", "tisch");
|
||||
checkOneTermReuse(a, "Tische", "tisch");
|
||||
checkOneTermReuse(a, "Tischen", "tisch");
|
||||
}
|
||||
|
||||
public void testExclusionTableBWCompat() throws IOException {
|
||||
GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT,
|
||||
GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("Fischen Trinken")));
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("fischen");
|
||||
filter.setExclusionSet(set);
|
||||
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("fischen");
|
||||
GermanStemFilter filter = new GermanStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Fischen Trinken")), set));
|
||||
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
|
||||
}
|
||||
|
||||
public void testWithKeywordAttributeAndExclusionTable() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("fischen");
|
||||
CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set1.add("trinken");
|
||||
set1.add("fischen");
|
||||
GermanStemFilter filter = new GermanStemFilter(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
new KeywordMarkerTokenFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Fischen Trinken")), set));
|
||||
filter.setExclusionSet(set1);
|
||||
assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
|
||||
|
@ -71,7 +71,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
|
||||
GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "tischen", "tisch");
|
||||
a.setStemExclusionTable(new String[] { "tischen" });
|
||||
checkOneTermReuse(a, "tischen", "tischen");
|
||||
|
@ -81,7 +81,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
* these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
|
||||
*/
|
||||
public void testGermanSpecials() throws Exception {
|
||||
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
|
||||
GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
|
||||
// a/o/u + e is equivalent to the umlaut form
|
||||
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
|
||||
checkOneTermReuse(a, "Schaltflaechen", "schaltflach");
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.KeywordTokenizer;
|
|||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the German stemmer. The stemming algorithm is known to work less
|
||||
|
@ -40,7 +39,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testStemming() throws Exception {
|
||||
Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
|
||||
TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer));
|
||||
TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
|
||||
// read test cases from external file:
|
||||
File dataDir = new File(System.getProperty("dataDir", "./bin"));
|
||||
File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");
|
||||
|
|
|
@ -32,7 +32,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
* @throws Exception in case an error occurs
|
||||
*/
|
||||
public void testAnalyzer() throws Exception {
|
||||
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
|
||||
// Verify the correct analysis of capitals and small accented letters
|
||||
assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
|
||||
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
|
||||
|
@ -48,7 +48,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
|
||||
// Verify the correct analysis of capitals and small accented letters
|
||||
assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
|
||||
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new EnglishAnalyzer(Version.LUCENE_CURRENT);
|
||||
new EnglishAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "books", "book");
|
||||
checkOneTermReuse(a, "book", "book");
|
||||
|
@ -46,7 +45,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("books");
|
||||
Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT,
|
||||
EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "books", "books");
|
||||
checkOneTermReuse(a, "book", "book");
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new SpanishAnalyzer(Version.LUCENE_CURRENT);
|
||||
new SpanishAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "chicana", "chican");
|
||||
checkOneTermReuse(a, "chicano", "chican");
|
||||
|
@ -46,7 +45,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("chicano");
|
||||
Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT,
|
||||
SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "chicana", "chican");
|
||||
checkOneTermReuse(a, "chicano", "chicano");
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.fa;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Persian Analyzer
|
||||
|
@ -31,7 +30,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This test fails with NPE when the stopwords file is missing in classpath
|
||||
*/
|
||||
public void testResourcesAvailable() {
|
||||
new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -42,7 +41,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
|
||||
*/
|
||||
public void testBehaviorVerbs() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// active present indicative
|
||||
assertAnalyzesTo(a, "میخورد", new String[] { "خورد" });
|
||||
// active preterite indicative
|
||||
|
@ -118,7 +117,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
|
||||
*/
|
||||
public void testBehaviorVerbsDefective() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// active present indicative
|
||||
assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
|
||||
// active preterite indicative
|
||||
|
@ -189,7 +188,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* nouns, removing the plural -ha.
|
||||
*/
|
||||
public void testBehaviorNouns() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
|
||||
assertAnalyzesTo(a, "برگها", new String[] { "برگ" });
|
||||
}
|
||||
|
@ -199,7 +198,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* (lowercased, etc)
|
||||
*/
|
||||
public void testBehaviorNonPersian() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
|
||||
}
|
||||
|
||||
|
@ -207,7 +206,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Basic test ensuring that reusableTokenStream works correctly.
|
||||
*/
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
|
||||
assertAnalyzesToReuse(a, "برگها", new String[] { "برگ" });
|
||||
}
|
||||
|
@ -216,7 +215,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Test that custom stopwords work, and are not case-sensitive.
|
||||
*/
|
||||
public void testCustomStopwords() throws Exception {
|
||||
PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
|
||||
PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, new String[] { "the", "and", "a" });
|
||||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Persian Normalization Filter
|
||||
|
@ -55,7 +54,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private void check(final String input, final String expected) throws IOException {
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(Version.LUCENE_CURRENT,
|
||||
ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader(input));
|
||||
PersianNormalizationFilter filter = new PersianNormalizationFilter(
|
||||
tokenStream);
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new FinnishAnalyzer(Version.LUCENE_CURRENT);
|
||||
new FinnishAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
|
||||
checkOneTermReuse(a, "edeltäjistään", "edeltäj");
|
||||
|
@ -46,7 +45,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("edeltäjistään");
|
||||
Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT,
|
||||
FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
|
||||
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -38,19 +37,19 @@ public class TestElision extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testElision() throws Exception {
|
||||
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
|
||||
Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
|
||||
Set articles = new HashSet();
|
||||
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
|
||||
Set<String> articles = new HashSet<String>();
|
||||
articles.add("l");
|
||||
articles.add("M");
|
||||
TokenFilter filter = new ElisionFilter(Version.LUCENE_CURRENT, tokenizer, articles);
|
||||
List tas = filtre(filter);
|
||||
TokenFilter filter = new ElisionFilter(TEST_VERSION_CURRENT, tokenizer, articles);
|
||||
List<String> tas = filter(filter);
|
||||
assertEquals("embrouille", tas.get(4));
|
||||
assertEquals("O'brian", tas.get(6));
|
||||
assertEquals("enfin", tas.get(7));
|
||||
}
|
||||
|
||||
private List filtre(TokenFilter filter) throws IOException {
|
||||
List tas = new ArrayList();
|
||||
private List<String> filter(TokenFilter filter) throws IOException {
|
||||
List<String> tas = new ArrayList<String>();
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
while (filter.incrementToken()) {
|
||||
tas.add(termAtt.term());
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.util.Version;
|
|||
public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testAnalyzer() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
assertAnalyzesTo(fa, "", new String[] {
|
||||
});
|
||||
|
@ -204,7 +204,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stopwords
|
||||
assertAnalyzesToReuse(
|
||||
fa,
|
||||
|
@ -229,27 +229,27 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
|
||||
fa.setStemExclusionTable(new String[] { "habitable" });
|
||||
assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
|
||||
}
|
||||
|
||||
public void testExclusionTableViaCtor() throws Exception {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("habitable");
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT,
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT,
|
||||
CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable",
|
||||
"chist" });
|
||||
|
||||
fa = new FrenchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
|
||||
"chist" });
|
||||
}
|
||||
|
||||
public void testElision() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -31,11 +30,11 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new HindiAnalyzer(Version.LUCENE_CURRENT);
|
||||
new HindiAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
public void testBasics() throws Exception {
|
||||
Analyzer a = new HindiAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT);
|
||||
// two ways to write 'hindi' itself.
|
||||
checkOneTermReuse(a, "हिन्दी", "हिंद");
|
||||
checkOneTermReuse(a, "हिंदी", "हिंद");
|
||||
|
@ -44,7 +43,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclusionSet() throws Exception {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("हिंदी");
|
||||
Analyzer a = new HindiAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT,
|
||||
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "हिंदी", "हिंदी");
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test HindiNormalizer
|
||||
|
@ -60,7 +59,7 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
|
|||
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
|
||||
}
|
||||
private void check(String input, String output) throws IOException {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader(input));
|
||||
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
|
||||
assertTokenStreamContents(tf, new String[] { output });
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test HindiStemmer
|
||||
|
@ -82,7 +81,7 @@ public class TestHindiStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private void check(String input, String output) throws IOException {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader(input));
|
||||
TokenFilter tf = new HindiStemFilter(tokenizer);
|
||||
assertTokenStreamContents(tf, new String[] { output });
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new HungarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
new HungarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "babakocsi", "babakocs");
|
||||
checkOneTermReuse(a, "babakocsijáért", "babakocs");
|
||||
|
@ -46,7 +45,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("babakocsi");
|
||||
Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT,
|
||||
HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "babakocsi", "babakocsi");
|
||||
checkOneTermReuse(a, "babakocsijáért", "babakocs");
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test IndicNormalizer
|
||||
|
@ -45,7 +44,7 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private void check(String input, String output) throws IOException {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader(input));
|
||||
TokenFilter tf = new IndicNormalizationFilter(tokenizer);
|
||||
assertTokenStreamContents(tf, new String[] { output });
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test IndicTokenizer
|
||||
|
@ -30,7 +29,7 @@ import org.apache.lucene.util.Version;
|
|||
public class TestIndicTokenizer extends BaseTokenStreamTestCase {
|
||||
/** Test tokenizing Indic vowels, signs, and punctuation */
|
||||
public void testBasics() throws IOException {
|
||||
TokenStream ts = new IndicTokenizer(Version.LUCENE_CURRENT,
|
||||
TokenStream ts = new IndicTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("मुझे हिंदी का और अभ्यास करना होगा ।"));
|
||||
assertTokenStreamContents(ts,
|
||||
new String[] { "मुझे", "हिंदी", "का", "और", "अभ्यास", "करना", "होगा" });
|
||||
|
@ -38,7 +37,7 @@ public class TestIndicTokenizer extends BaseTokenStreamTestCase {
|
|||
|
||||
/** Test that words with format chars such as ZWJ are kept */
|
||||
public void testFormat() throws Exception {
|
||||
TokenStream ts = new IndicTokenizer(Version.LUCENE_CURRENT,
|
||||
TokenStream ts = new IndicTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("शार्मा शार्मा"));
|
||||
assertTokenStreamContents(ts, new String[] { "शार्मा", "शार्मा" });
|
||||
}
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ItalianAnalyzer(Version.LUCENE_CURRENT);
|
||||
new ItalianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "abbandonata", "abbandon");
|
||||
checkOneTermReuse(a, "abbandonati", "abbandon");
|
||||
|
@ -46,7 +45,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("abbandonata");
|
||||
Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT,
|
||||
ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "abbandonata", "abbandonata");
|
||||
checkOneTermReuse(a, "abbandonati", "abbandon");
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.regex.Pattern;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Verifies the behavior of PatternAnalyzer.
|
||||
|
@ -37,13 +36,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testNonWordPattern() throws IOException {
|
||||
// Split on non-letter pattern, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
|
||||
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
|
||||
false, null);
|
||||
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"The", "quick", "brown", "Fox", "the", "abcd", "dc" });
|
||||
|
||||
// split on non-letter pattern, lowercase, english stopwords
|
||||
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
|
||||
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
|
||||
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"quick", "brown", "fox", "abcd", "dc" });
|
||||
|
@ -55,13 +54,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testWhitespacePattern() throws IOException {
|
||||
// Split on whitespace patterns, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
false, null);
|
||||
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
|
||||
|
||||
// Split on whitespace patterns, lowercase, english stopwords
|
||||
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
|
||||
|
@ -73,12 +72,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testCustomPattern() throws IOException {
|
||||
// Split on comma, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), false, null);
|
||||
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
|
||||
check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
|
||||
"Are", "some", "Comma", "separated", "words" });
|
||||
|
||||
// split on comma, lowercase, english stopwords
|
||||
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), true,
|
||||
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true,
|
||||
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
|
||||
"some", "comma", "separated", "words" });
|
||||
|
@ -103,7 +102,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
document.append(largeWord2);
|
||||
|
||||
// Split on whitespace patterns, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
false, null);
|
||||
check(a, document.toString(), new String[] { new String(largeWord),
|
||||
new String(largeWord2) });
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -31,7 +30,7 @@ public class TestPrefixAndSuffixAwareTokenFilter extends BaseTokenStreamTestCase
|
|||
|
||||
PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
|
||||
new SingleTokenTokenStream(createToken("^", 0, 0)),
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hello world")),
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")),
|
||||
new SingleTokenTokenStream(createToken("$", 0, 0)));
|
||||
|
||||
assertTokenStreamContents(ts,
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -42,7 +41,7 @@ public class TestPrefixAwareTokenFilter extends BaseTokenStreamTestCase {
|
|||
// prefix and suffix using 2x prefix
|
||||
|
||||
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
|
||||
new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("hello world")));
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")));
|
||||
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
|
||||
|
||||
assertTokenStreamContents(ts,
|
||||
|
|
|
@ -10,7 +10,6 @@ import org.apache.lucene.analysis.KeywordTokenizer;
|
|||
import org.apache.lucene.analysis.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -38,7 +37,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
|
|||
dictionary.put("booked", "books");
|
||||
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
|
||||
TokenStream stream = new PorterStemFilter(
|
||||
new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
|
||||
new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
|
||||
assertTokenStreamContents(stream, new String[] { "books" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ngram;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
|
@ -31,9 +30,9 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
private TokenStream input;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
|
||||
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
|
||||
}
|
||||
|
||||
public void testInvalidInput() throws Exception {
|
||||
|
@ -92,13 +91,13 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testSmallTokenInStream() throws Exception {
|
||||
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh"));
|
||||
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
|
||||
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
|
||||
}
|
||||
|
||||
public void testReset() throws Exception {
|
||||
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
|
||||
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
|
||||
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
|
||||
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
|
||||
tokenizer.reset(new StringReader("abcde"));
|
||||
|
|
|
@ -29,7 +29,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase {
|
|||
private StringReader input;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
input = new StringReader("abcde");
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ngram;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
|
@ -31,9 +30,9 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
private TokenStream input;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
|
||||
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
|
||||
}
|
||||
|
||||
public void testInvalidInput() throws Exception {
|
||||
|
@ -81,13 +80,13 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testSmallTokenInStream() throws Exception {
|
||||
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh"));
|
||||
input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
|
||||
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
|
||||
assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
|
||||
}
|
||||
|
||||
public void testReset() throws Exception {
|
||||
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abcde"));
|
||||
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
|
||||
NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
|
||||
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
|
||||
tokenizer.reset(new StringReader("abcde"));
|
||||
|
|
|
@ -29,7 +29,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase {
|
|||
private StringReader input;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
input = new StringReader("abcde");
|
||||
}
|
||||
|
|
|
@ -127,14 +127,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testSnowballCorrectness() throws Exception {
|
||||
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "opheffen", "opheff");
|
||||
checkOneTermReuse(a, "opheffende", "opheff");
|
||||
checkOneTermReuse(a, "opheffing", "opheff");
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
checkOneTermReuse(a, "lichamelijke", "licham");
|
||||
|
@ -146,7 +146,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
a.setStemExclusionTable(new String[] { "lichamelijk" });
|
||||
checkOneTermReuse(a, "lichamelijk", "lichamelijk");
|
||||
|
@ -157,10 +157,10 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
public void testExclusionTableViaCtor() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
|
||||
set.add("lichamelijk");
|
||||
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesToReuse(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
|
||||
|
||||
a = new DutchAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
|
||||
|
||||
}
|
||||
|
@ -170,7 +170,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testStemDictionaryReuse() throws Exception {
|
||||
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
a.setStemDictionary(customDictFile);
|
||||
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
|
||||
|
@ -196,7 +196,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
|
||||
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
|
||||
}
|
||||
|
||||
}
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new NorwegianAnalyzer(Version.LUCENE_CURRENT);
|
||||
new NorwegianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
|
||||
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
|
||||
|
@ -46,7 +45,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("havnedistriktene");
|
||||
Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT,
|
||||
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
|
||||
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
|
||||
|
|
|
@ -22,21 +22,15 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
|
||||
|
||||
public void testPayloads() throws Exception {
|
||||
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
|
||||
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
|
||||
(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)),
|
||||
(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
|
||||
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
|
||||
|
@ -57,7 +51,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
|
|||
|
||||
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
|
||||
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
|
||||
(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)),
|
||||
(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
|
||||
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
|
||||
assertTermEquals("The", filter, null);
|
||||
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
|
||||
|
@ -75,7 +69,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
|
|||
|
||||
public void testFloatEncoding() throws Exception {
|
||||
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
|
||||
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), '|', new FloatEncoder());
|
||||
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
|
||||
assertTermEquals("The", filter, termAtt, payAtt, null);
|
||||
|
@ -93,7 +87,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
|
|||
|
||||
public void testIntEncoding() throws Exception {
|
||||
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
|
||||
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
|
||||
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
|
||||
assertTermEquals("The", filter, termAtt, payAtt, null);
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -38,7 +37,7 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
public void test() throws IOException {
|
||||
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||
|
||||
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))), 3, "D");
|
||||
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
|
||||
boolean seenDogs = false;
|
||||
TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
|
||||
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
|
||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -36,7 +35,7 @@ public class TokenOffsetPayloadTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
public void test() throws IOException {
|
||||
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||
|
||||
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)));
|
||||
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
|
||||
int count = 0;
|
||||
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
|
||||
OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -38,7 +37,7 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
public void test() throws IOException {
|
||||
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||
|
||||
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))));
|
||||
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
|
||||
int count = 0;
|
||||
TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
|
||||
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
|
||||
|
@ -48,7 +47,6 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))));
|
||||
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
|
||||
String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
|
||||
assertTrue("type is null and it shouldn't be", type != null);
|
||||
assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
|
||||
count++;
|
||||
}
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new PortugueseAnalyzer(Version.LUCENE_CURRENT);
|
||||
new PortugueseAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "quilométricas", "quilométr");
|
||||
checkOneTermReuse(a, "quilométricos", "quilométr");
|
||||
|
@ -46,7 +45,7 @@ public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("quilométricas");
|
||||
Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT,
|
||||
PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "quilométricas", "quilométricas");
|
||||
checkOneTermReuse(a, "quilométricos", "quilométr");
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.queryParser.QueryParser;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
|
||||
|
@ -51,7 +50,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dir = new RAMDirectory();
|
||||
appAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
|
||||
appAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
|
||||
IndexWriter writer = new IndexWriter(dir, appAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
int numDocs = 200;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
|
@ -64,7 +63,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
writer.close();
|
||||
reader = IndexReader.open(dir, true);
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer);
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -75,7 +74,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
|
||||
//Helper method to query
|
||||
private int search(Analyzer a, String queryString) throws IOException, ParseException {
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "repetitiveField", a);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
|
||||
Query q = qp.parse(queryString);
|
||||
return new IndexSearcher(reader).search(q, null, 1000).totalHits;
|
||||
}
|
||||
|
@ -157,14 +156,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
if (++invocationCount % 2 == 0)
|
||||
return new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
else
|
||||
return new LetterTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testWrappingNonReusableAnalyzer() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer());
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
|
||||
a.addStopWords(reader, 10);
|
||||
int numHits = search(a, "repetitiveField:boring");
|
||||
assertTrue(numHits == 0);
|
||||
|
@ -173,7 +172,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testTokenStream() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT));
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
|
||||
a.addStopWords(reader, 10);
|
||||
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
|
||||
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
|
||||
|
|
|
@ -27,9 +27,9 @@ import org.apache.lucene.util.Version;
|
|||
|
||||
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
|
||||
public void testFilter() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT,
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("Do have a nice day")); // 1-4 length string
|
||||
ReverseStringFilter filter = new ReverseStringFilter(Version.LUCENE_CURRENT, stream);
|
||||
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
|
||||
TermAttribute text = filter.getAttribute(TermAttribute.class);
|
||||
assertTrue(filter.incrementToken());
|
||||
assertEquals("oD", text.term());
|
||||
|
@ -45,9 +45,9 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testFilterWithMark() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"Do have a nice day")); // 1-4 length string
|
||||
ReverseStringFilter filter = new ReverseStringFilter(Version.LUCENE_CURRENT, stream, '\u0001');
|
||||
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
|
||||
TermAttribute text = filter
|
||||
.getAttribute(TermAttribute.class);
|
||||
assertTrue(filter.incrementToken());
|
||||
|
@ -64,14 +64,14 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReverseString() throws Exception {
|
||||
assertEquals( "A", ReverseStringFilter.reverse( "A" ) );
|
||||
assertEquals( "BA", ReverseStringFilter.reverse( "AB" ) );
|
||||
assertEquals( "CBA", ReverseStringFilter.reverse( "ABC" ) );
|
||||
assertEquals( "A", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "A" ) );
|
||||
assertEquals( "BA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "AB" ) );
|
||||
assertEquals( "CBA", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "ABC" ) );
|
||||
}
|
||||
|
||||
public void testReverseChar() throws Exception {
|
||||
char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' };
|
||||
ReverseStringFilter.reverse( buffer, 2, 3 );
|
||||
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 2, 3 );
|
||||
assertEquals( "ABEDCF", new String( buffer ) );
|
||||
}
|
||||
|
||||
|
@ -84,37 +84,37 @@ public class TestReverseStringFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testReverseSupplementary() throws Exception {
|
||||
// supplementary at end
|
||||
assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅"));
|
||||
assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅"));
|
||||
// supplementary at end - 1
|
||||
assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅a"));
|
||||
assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "瀛愯䇹鍟艱𩬅a"));
|
||||
// supplementary at start
|
||||
assertEquals("fedcba𩬅", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "𩬅abcdef"));
|
||||
assertEquals("fedcba𩬅", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "𩬅abcdef"));
|
||||
// supplementary at start + 1
|
||||
assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "z𩬅abcdef"));
|
||||
assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "z𩬅abcdef"));
|
||||
// supplementary medial
|
||||
assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "abcd𩬅efg"));
|
||||
assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(TEST_VERSION_CURRENT, "abcd𩬅efg"));
|
||||
}
|
||||
|
||||
public void testReverseSupplementaryChar() throws Exception {
|
||||
// supplementary at end
|
||||
char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray();
|
||||
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
|
||||
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
|
||||
assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer));
|
||||
// supplementary at end - 1
|
||||
buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray();
|
||||
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8);
|
||||
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8);
|
||||
assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer));
|
||||
// supplementary at start
|
||||
buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray();
|
||||
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
|
||||
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
|
||||
assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer));
|
||||
// supplementary at start + 1
|
||||
buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray();
|
||||
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8);
|
||||
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 8);
|
||||
assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer));
|
||||
// supplementary medial
|
||||
buffer = "abc瀛愯𩬅def".toCharArray();
|
||||
ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
|
||||
ReverseStringFilter.reverse(TEST_VERSION_CURRENT, buffer, 3, 7);
|
||||
assertEquals("abcfed𩬅愯瀛", new String(buffer));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new RomanianAnalyzer(Version.LUCENE_CURRENT);
|
||||
new RomanianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "absenţa", "absenţ");
|
||||
checkOneTermReuse(a, "absenţi", "absenţ");
|
||||
|
@ -46,7 +45,7 @@ public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("absenţa");
|
||||
Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT,
|
||||
RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "absenţa", "absenţa");
|
||||
checkOneTermReuse(a, "absenţi", "absenţ");
|
||||
|
|
|
@ -44,8 +44,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
private File dataDir;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception
|
||||
{
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dataDir = new File(System.getProperty("dataDir", "./bin"));
|
||||
}
|
||||
|
@ -71,7 +70,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
TokenStream in = ra.tokenStream("all", inWords);
|
||||
|
||||
RussianLetterTokenizer sample =
|
||||
new RussianLetterTokenizer(Version.LUCENE_CURRENT,
|
||||
new RussianLetterTokenizer(TEST_VERSION_CURRENT,
|
||||
sampleUnicode);
|
||||
|
||||
TermAttribute text = in.getAttribute(TermAttribute.class);
|
||||
|
@ -98,7 +97,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
public void testDigitsInRussianCharset()
|
||||
{
|
||||
Reader reader = new StringReader("text 1000");
|
||||
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
|
||||
RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
|
||||
TokenStream stream = ra.tokenStream("", reader);
|
||||
|
||||
TermAttribute termText = stream.getAttribute(TermAttribute.class);
|
||||
|
@ -126,7 +125,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
|
||||
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
|
||||
assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
|
||||
|
@ -135,9 +134,9 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
|
||||
|
||||
public void testWithStemExclusionSet() throws Exception {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("представление");
|
||||
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
|
||||
Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
|
||||
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
|
||||
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
|
||||
|
||||
|
|
|
@ -30,8 +30,8 @@ import java.util.ArrayList;
|
|||
@Deprecated
|
||||
public class TestRussianStem extends LuceneTestCase
|
||||
{
|
||||
private ArrayList words = new ArrayList();
|
||||
private ArrayList stems = new ArrayList();
|
||||
private ArrayList<String> words = new ArrayList<String>();
|
||||
private ArrayList<String> stems = new ArrayList<String>();
|
||||
|
||||
public TestRussianStem(String name)
|
||||
{
|
||||
|
@ -42,8 +42,7 @@ public class TestRussianStem extends LuceneTestCase
|
|||
* @see TestCase#setUp()
|
||||
*/
|
||||
@Override
|
||||
protected void setUp() throws Exception
|
||||
{
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
//System.out.println(new java.util.Date());
|
||||
String str;
|
||||
|
@ -75,15 +74,6 @@ public class TestRussianStem extends LuceneTestCase
|
|||
inStems.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see TestCase#tearDown()
|
||||
*/
|
||||
@Override
|
||||
protected void tearDown() throws Exception
|
||||
{
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testStem()
|
||||
{
|
||||
for (int i = 0; i < words.size(); i++)
|
||||
|
@ -91,7 +81,7 @@ public class TestRussianStem extends LuceneTestCase
|
|||
//if ( (i % 100) == 0 ) System.err.println(i);
|
||||
String realStem =
|
||||
RussianStemmer.stemWord(
|
||||
(String) words.get(i));
|
||||
words.get(i));
|
||||
assertEquals("unicode", stems.get(i), realStem);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreDoc;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A test class for ShingleAnalyzerWrapper as regards queries and scoring.
|
||||
|
@ -86,7 +85,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
|
||||
searcher = setUpSearcher(analyzer);
|
||||
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
|
||||
|
||||
Query q = qp.parse(qs);
|
||||
|
||||
|
@ -106,7 +105,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
|
||||
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
|
||||
"test sentence");
|
||||
int[] ranks = new int[] { 1, 2, 0 };
|
||||
compareRanks(hits, ranks);
|
||||
|
@ -117,7 +116,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
|
||||
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
|
||||
"\"this sentence\"");
|
||||
int[] ranks = new int[] { 0 };
|
||||
compareRanks(hits, ranks);
|
||||
|
@ -128,7 +127,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
|
||||
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
|
||||
"\"test sentence\"");
|
||||
int[] ranks = new int[] { 1 };
|
||||
compareRanks(hits, ranks);
|
||||
|
@ -139,7 +138,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2),
|
||||
(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
|
||||
"+test +sentence");
|
||||
int[] ranks = new int[] { 1, 2 };
|
||||
compareRanks(hits, ranks);
|
||||
|
@ -149,7 +148,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
* This shows how to construct a phrase query containing shingles.
|
||||
*/
|
||||
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
|
||||
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2);
|
||||
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
|
||||
searcher = setUpSearcher(analyzer);
|
||||
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
|
@ -178,7 +177,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
* in the right order and adjacent to each other.
|
||||
*/
|
||||
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
|
||||
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2);
|
||||
Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
|
||||
searcher = setUpSearcher(analyzer);
|
||||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
|
@ -200,7 +199,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_CURRENT), 2);
|
||||
Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
|
||||
assertAnalyzesToReuse(a, "please divide into shingles",
|
||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
|
||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
||||
|
@ -222,9 +221,9 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
if (++invocationCount % 2 == 0)
|
||||
return new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
else
|
||||
return new LetterTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -249,7 +248,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNonDefaultMinShingleSize() throws Exception {
|
||||
ShingleAnalyzerWrapper analyzer
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 3, 4);
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 4);
|
||||
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
|
||||
new String[] { "please", "please divide this", "please divide this sentence",
|
||||
"divide", "divide this sentence", "divide this sentence into",
|
||||
|
@ -273,7 +272,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
|
||||
ShingleAnalyzerWrapper analyzer
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 3, 3);
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 3);
|
||||
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
|
||||
new String[] { "please", "please divide this",
|
||||
"divide", "divide this sentence",
|
||||
|
@ -297,7 +296,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNoTokenSeparator() throws Exception {
|
||||
ShingleAnalyzerWrapper analyzer
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer());
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
|
||||
analyzer.setTokenSeparator("");
|
||||
assertAnalyzesToReuse(analyzer, "please divide into shingles",
|
||||
new String[] { "please", "pleasedivide",
|
||||
|
@ -319,7 +318,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNullTokenSeparator() throws Exception {
|
||||
ShingleAnalyzerWrapper analyzer
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer());
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
|
||||
analyzer.setTokenSeparator(null);
|
||||
assertAnalyzesToReuse(analyzer, "please divide into shingles",
|
||||
new String[] { "please", "pleasedivide",
|
||||
|
@ -340,7 +339,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
public void testAltTokenSeparator() throws Exception {
|
||||
ShingleAnalyzerWrapper analyzer
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer());
|
||||
= new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
|
||||
analyzer.setTokenSeparator("<SEP>");
|
||||
assertAnalyzesToReuse(analyzer, "please divide into shingles",
|
||||
new String[] { "please", "please<SEP>divide",
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class ShingleFilterTest extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -836,7 +835,7 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase {
|
|||
|
||||
|
||||
public void testReset() throws Exception {
|
||||
Tokenizer wsTokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("please divide this sentence"));
|
||||
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
|
||||
TokenStream filter = new ShingleFilter(wsTokenizer, 2);
|
||||
assertTokenStreamContents(filter,
|
||||
new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.payloads.PayloadHelper;
|
|||
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix;
|
||||
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -41,11 +40,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testIterator() throws IOException {
|
||||
|
||||
WhitespaceTokenizer wst = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("one two three four five"));
|
||||
WhitespaceTokenizer wst = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("one two three four five"));
|
||||
ShingleMatrixFilter smf = new ShingleMatrixFilter(wst, 2, 2, '_', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
|
||||
|
||||
int i;
|
||||
for(i=0; smf.incrementToken(); i++);
|
||||
for(i=0; smf.incrementToken(); i++) {}
|
||||
assertEquals(4, i);
|
||||
|
||||
// call next once more. this should return false again rather than throwing an exception (LUCENE-1939)
|
||||
|
@ -65,11 +64,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
assertFalse(ts.incrementToken());
|
||||
|
||||
TokenListStream tls;
|
||||
LinkedList tokens;
|
||||
LinkedList<Token> tokens;
|
||||
|
||||
// test a plain old token stream with synonyms translated to rows.
|
||||
|
||||
tokens = new LinkedList();
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens.add(createToken("please", 0, 6));
|
||||
tokens.add(createToken("divide", 7, 13));
|
||||
tokens.add(createToken("this", 14, 18));
|
||||
|
@ -101,11 +100,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
TokenStream ts;
|
||||
TokenStream tls;
|
||||
LinkedList tokens;
|
||||
LinkedList<Token> tokens;
|
||||
|
||||
// test a plain old token stream with synonyms tranlated to rows.
|
||||
|
||||
tokens = new LinkedList();
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens.add(tokenFactory("hello", 1, 0, 4));
|
||||
tokens.add(tokenFactory("greetings", 0, 0, 4));
|
||||
tokens.add(tokenFactory("world", 1, 5, 10));
|
||||
|
@ -145,7 +144,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
|
||||
|
||||
tokens = new LinkedList();
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
|
||||
tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
|
||||
tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn));
|
||||
|
@ -286,7 +285,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
//
|
||||
|
||||
|
||||
tokens = new LinkedList();
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
|
||||
tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
|
||||
tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow));
|
||||
|
@ -413,11 +412,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
}
|
||||
|
||||
private Token tokenFactory(String text, int startOffset, int endOffset) {
|
||||
return tokenFactory(text, 1, 1f, startOffset, endOffset);
|
||||
}
|
||||
|
||||
|
||||
private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) {
|
||||
Token token = new Token(startOffset, endOffset);
|
||||
token.setTermBuffer(text);
|
||||
|
@ -430,10 +424,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
return tokenFactory(text, posIncr, 1f, 0, 0);
|
||||
}
|
||||
|
||||
private Token tokenFactory(String text, int posIncr, float weight) {
|
||||
return tokenFactory(text, posIncr, weight, 0, 0);
|
||||
}
|
||||
|
||||
private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) {
|
||||
Token token = new Token(startOffset, endOffset);
|
||||
token.setTermBuffer(text);
|
||||
|
@ -460,17 +450,6 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
assertEquals(text, termAtt.term());
|
||||
}
|
||||
|
||||
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost) throws IOException {
|
||||
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
|
||||
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals(text, termAtt.term());
|
||||
assertEquals(positionIncrement, posIncrAtt.getPositionIncrement());
|
||||
assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0);
|
||||
}
|
||||
|
||||
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException {
|
||||
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -505,7 +484,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public static class TokenListStream extends TokenStream {
|
||||
|
||||
private Collection tokens;
|
||||
private Collection<Token> tokens;
|
||||
TermAttribute termAtt;
|
||||
PositionIncrementAttribute posIncrAtt;
|
||||
PayloadAttribute payloadAtt;
|
||||
|
@ -513,7 +492,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
TypeAttribute typeAtt;
|
||||
FlagsAttribute flagsAtt;
|
||||
|
||||
public TokenListStream(Collection tokens) {
|
||||
public TokenListStream(Collection<Token> tokens) {
|
||||
this.tokens = tokens;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -523,7 +502,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
flagsAtt = addAttribute(FlagsAttribute.class);
|
||||
}
|
||||
|
||||
private Iterator iterator;
|
||||
private Iterator<Token> iterator;
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
|
@ -533,7 +512,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
|
|||
if (!iterator.hasNext()) {
|
||||
return false;
|
||||
}
|
||||
Token prototype = (Token) iterator.next();
|
||||
Token prototype = iterator.next();
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
|
||||
posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -37,7 +36,7 @@ public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
|
|||
public void test() throws IOException {
|
||||
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
|
||||
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
|
||||
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)));
|
||||
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
|
||||
SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
|
||||
int count = 0;
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -35,7 +34,7 @@ public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
|
|||
public void test() throws IOException {
|
||||
TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
|
||||
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test)));
|
||||
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
|
||||
SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
|
||||
|
||||
int count = 0;
|
||||
|
|
|
@ -27,11 +27,9 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
|||
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
|
||||
public TokenTypeSinkTokenizerTest(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
@ -40,7 +38,7 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
|
|||
TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
|
||||
String test = "The quick red fox jumped over the lazy brown dogs";
|
||||
|
||||
TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(test))));
|
||||
TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
|
||||
SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
|
||||
|
||||
boolean seenDogs = false;
|
||||
|
|
|
@ -33,13 +33,13 @@ import org.apache.lucene.util.Version;
|
|||
public class TestSnowball extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testEnglish() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
|
||||
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
|
||||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
new String[]{"he", "abhor", "accent"});
|
||||
}
|
||||
|
||||
public void testStopwords() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English",
|
||||
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English",
|
||||
StandardAnalyzer.STOP_WORDS_SET);
|
||||
assertAnalyzesTo(a, "the quick brown fox jumped",
|
||||
new String[]{"quick", "brown", "fox", "jump"});
|
||||
|
@ -50,7 +50,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
* we lowercase I correct for non-Turkish languages in either case.
|
||||
*/
|
||||
public void testEnglishLowerCase() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
|
||||
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
|
||||
assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" });
|
||||
assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" });
|
||||
|
||||
|
@ -63,7 +63,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
* Test turkish lowercasing
|
||||
*/
|
||||
public void testTurkish() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "Turkish");
|
||||
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish");
|
||||
|
||||
assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
|
||||
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
|
||||
|
@ -84,7 +84,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
|
||||
Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
|
||||
assertAnalyzesToReuse(a, "he abhorred accents",
|
||||
new String[]{"he", "abhor", "accent"});
|
||||
assertAnalyzesToReuse(a, "she abhorred him",
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new SwedishAnalyzer(Version.LUCENE_CURRENT);
|
||||
new SwedishAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
|
||||
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
|
||||
|
@ -46,7 +45,7 @@ public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("jaktkarlarne");
|
||||
Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT,
|
||||
SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
|
||||
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.th;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
|
||||
|
@ -32,7 +31,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* testcase for offsets
|
||||
*/
|
||||
public void testOffsets() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์",
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
|
||||
new int[] { 0, 2, 7, 9, 12 },
|
||||
new int[] { 2, 7, 9, 12, 17});
|
||||
|
@ -50,7 +49,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
|
||||
*/
|
||||
public void testBuggyTokenType() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
|
||||
}
|
||||
|
@ -64,7 +63,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
|
||||
public void testAnalyzer() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
assertAnalyzesTo(analyzer, "", new String[] {});
|
||||
|
||||
|
@ -89,7 +88,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Test that position increments are adjusted correctly for stopwords.
|
||||
*/
|
||||
public void testPositionIncrements() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
|
||||
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
|
||||
|
@ -106,7 +105,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
|
||||
assertAnalyzesToReuse(analyzer, "", new String[] {});
|
||||
|
||||
assertAnalyzesToReuse(
|
||||
|
|
|
@ -23,18 +23,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new TurkishAnalyzer(Version.LUCENE_CURRENT);
|
||||
new TurkishAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
|
||||
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "ağacı", "ağaç");
|
||||
checkOneTermReuse(a, "ağaç", "ağaç");
|
||||
|
@ -46,7 +45,7 @@ public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("ağacı");
|
||||
Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT,
|
||||
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT,
|
||||
TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "ağacı", "ağacı");
|
||||
checkOneTermReuse(a, "ağaç", "ağaç");
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Turkish lowercase filter.
|
||||
|
@ -33,7 +32,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
|
|||
* Test composed forms
|
||||
*/
|
||||
public void testTurkishLowerCaseFilter() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"\u0130STANBUL \u0130ZM\u0130R ISPARTA"));
|
||||
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
|
||||
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
|
||||
|
@ -44,7 +43,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
|
|||
* Test decomposed forms
|
||||
*/
|
||||
public void testDecomposed() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"));
|
||||
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
|
||||
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
|
||||
|
@ -57,7 +56,7 @@ public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
|
|||
* to U+0130 + U+0316, and is lowercased the same way.
|
||||
*/
|
||||
public void testDecomposed2() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader(
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"));
|
||||
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
|
||||
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
|
||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.lucene.ant;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
|
@ -31,13 +29,13 @@ import org.apache.lucene.search.Searcher;
|
|||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.tools.ant.Project;
|
||||
import org.apache.tools.ant.types.FileSet;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Test cases for index task
|
||||
*
|
||||
*/
|
||||
public class IndexTaskTest extends TestCase {
|
||||
public class IndexTaskTest extends LuceneTestCase {
|
||||
private final static String docHandler =
|
||||
"org.apache.lucene.ant.FileExtensionDocumentHandler";
|
||||
|
||||
|
@ -55,7 +53,8 @@ public class IndexTaskTest extends TestCase {
|
|||
*@exception IOException Description of Exception
|
||||
*/
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
Project project = new Project();
|
||||
|
||||
IndexTask task = new IndexTask();
|
||||
|
@ -71,12 +70,12 @@ public class IndexTaskTest extends TestCase {
|
|||
|
||||
dir = FSDirectory.open(indexDir);
|
||||
searcher = new IndexSearcher(dir, true);
|
||||
analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
|
||||
analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
|
||||
public void testSearch() throws Exception {
|
||||
Query query = new QueryParser(Version.LUCENE_CURRENT, "contents",analyzer).parse("test");
|
||||
Query query = new QueryParser(TEST_VERSION_CURRENT, "contents",analyzer).parse("test");
|
||||
|
||||
int numHits = searcher.search(query, null, 1000).totalHits;
|
||||
|
||||
|
@ -88,9 +87,10 @@ public class IndexTaskTest extends TestCase {
|
|||
* TODO: remove indexDir?
|
||||
*/
|
||||
@Override
|
||||
public void tearDown() throws IOException {
|
||||
protected void tearDown() throws Exception {
|
||||
searcher.close();
|
||||
dir.close();
|
||||
super.tearDown();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue