LUCENE-6271: sync up with trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1670257 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan Ernst 2015-03-31 05:22:40 +00:00
commit 05cf3fde0d
1088 changed files with 44832 additions and 16480 deletions

View File

@ -2,7 +2,7 @@
<library name="JUnit">
<CLASSES>
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.9.jar!/" />
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.12.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@ -16,7 +16,7 @@
import os
import sys
sys.path.append(os.path.dirname(__file__))
import scriptutil
from scriptutil import *
import argparse
import io
@ -36,7 +36,7 @@ def update_changes(filename, new_version):
buffer.append(line)
return match is not None
changed = scriptutil.update_file(filename, matcher, edit)
changed = update_file(filename, matcher, edit)
print('done' if changed else 'uptodate')
def add_constant(new_version, deprecate):
@ -90,7 +90,7 @@ def add_constant(new_version, deprecate):
buffer.append(line)
return False
changed = scriptutil.update_file(filename, matcher, Edit())
changed = update_file(filename, matcher, Edit())
print('done' if changed else 'uptodate')
version_prop_re = re.compile('version\.base=(.*)')
@ -103,7 +103,7 @@ def update_build_version(new_version):
buffer.append('version.base=' + new_version.dot + '\n')
return True
changed = scriptutil.update_file(filename, version_prop_re, edit)
changed = update_file(filename, version_prop_re, edit)
print('done' if changed else 'uptodate')
def update_latest_constant(new_version):
@ -116,7 +116,7 @@ def update_latest_constant(new_version):
buffer.append(line.rpartition('=')[0] + ('= %s;\n' % new_version.constant))
return True
changed = scriptutil.update_file(filename, matcher, edit)
changed = update_file(filename, matcher, edit)
print('done' if changed else 'uptodate')
def update_example_solrconfigs(new_version):
@ -139,7 +139,7 @@ def update_solrconfig(filename, matcher, new_version):
buffer.append(line.replace(match.group(1), new_version.dot))
return True
changed = scriptutil.update_file(filename, matcher, edit)
changed = update_file(filename, matcher, edit)
print('done' if changed else 'uptodate')
def check_lucene_version_tests():
@ -165,7 +165,7 @@ def read_config():
parser.add_argument('-r', '--downstream-repo', help='Path to downstream checkout for given changeid')
c = parser.parse_args()
c.branch_type = scriptutil.find_branch_type()
c.branch_type = find_branch_type()
c.matching_branch = c.version.is_bugfix_release() and c.branch_type == 'release' or \
c.version.is_minor_release() and c.branch_type == 'stable' or \
c.branch_type == 'major'

View File

@ -48,8 +48,28 @@ New Features
* LUCENE-6227: Added BooleanClause.Occur.FILTER to filter documents without
participating in scoring (on the contrary to MUST). (Adrien Grand)
* LUCENE-6294: Added oal.search.CollectorManager to allow for parallelization
of the document collection process on IndexSearcher. (Adrien Grand)
* LUCENE-6303: Added filter caching baked into IndexSearcher, disabled by
default. (Adrien Grand)
* LUCENE-6304: Added a new MatchNoDocsQuery that matches no documents.
(Lee Hinman via Adrien Grand)
* LUCENE-6341: Add a -fast option to CheckIndex. (Robert Muir)
* LUCENE-6355: IndexWriter's infoStream now also logs time to write FieldInfos
during merge (Lee Hinman via Mike McCandless)
* LUCENE-6339: Added Near-real time Document Suggester via custom postings format
(Areek Zillur, Mike McCandless, Simon Willnauer)
Bug Fixes
* LUCENE-6368: FST.save can truncate output (BufferedOutputStream may be closed
after the underlying stream). (Ippei Matsushima via Dawid Weiss)
* LUCENE-6249: StandardQueryParser doesn't support pure negative clauses.
(Dawid Weiss)
@ -59,6 +79,16 @@ Bug Fixes
* LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when
object alignment was different from 8. (Uwe Schindler, Adrien Grand)
* LUCENE-6293: Fixed TimSorter bug. (Adrien Grand)
* LUCENE-6001: DrillSideways hits NullPointerException for certain
BooleanQuery searches. (Dragan Jotannovic, jane chang via Mike
McCandless)
* LUCENE-6311: Fix NIOFSDirectory and SimpleFSDirectory so that the
toString method of IndexInputs confess when they are from a compound
file. (Robert Muir, Mike McCandless)
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
@ -84,10 +114,10 @@ Optimizations
* LUCENE-6233 Speed up CheckIndex when the index has term vectors
(Robert Muir, Mike McCandless)
* LUCENE-6198: Added the TwoPhaseDocIdSetIterator API, exposed on scorers which
* LUCENE-6198: Added the TwoPhaseIterator API, exposed on scorers which
is for now only used on phrase queries and conjunctions in order to check
positions lazily if the phrase query is in a conjunction with other queries.
(Robert Muir, Adrien Grand)
(Robert Muir, Adrien Grand, David Smiley)
* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
minShouldMatch > 1 now either propagate or take advantage of the two-phase
@ -108,6 +138,19 @@ Optimizations
in order to advance doc IDs, which takes advantage of the cost() API.
(Adrien Grand)
* LUCENE-6290: QueryWrapperFilter propagates approximations and FilteredQuery
rewrites to a BooleanQuery when the filter is a QueryWrapperFilter in order
to leverage approximations. (Adrien Grand)
* LUCENE-6318: Reduce RAM usage of FieldInfos when there are many fields.
(Mike McCandless, Robert Muir)
* LUCENE-6320: Speed up CheckIndex. (Robert Muir)
* LUCENE-4942: Optimized the encoding of PrefixTreeStrategy indexes for
non-point data: 33% smaller index, 68% faster indexing, and 44% faster
searching. YMMV (David Smiley)
API Changes
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
@ -146,6 +189,9 @@ API Changes
* LUCENE-6268: Replace FieldValueFilter and DocValuesRangeFilter with equivalent
queries that support approximations. (Adrien Grand)
* LUCENE-6289: Replace DocValuesRangeFilter with DocValuesRangeQuery which
supports approximations. (Adrien Grand)
* LUCENE-6266: Remove unnecessary Directory params from SegmentInfo.toString,
SegmentInfos.files/toString, and SegmentCommitInfo.toString. (Robert Muir)
@ -159,6 +205,24 @@ API Changes
* LUCENE-6286: Removed IndexSearcher methods that take a Filter object.
A BooleanQuery with a filter clause must be used instead. (Adrien Grand)
* LUCENE-6300: PrefixFilter, TermRangeFilter and NumericRangeFilter have been
removed. Use PrefixQuery, TermRangeQuery and NumericRangeQuery instead.
(Adrien Grand)
* LUCENE-6303: Replaced FilterCache with QueryCache and CachingWrapperFilter
with CachingWrapperQuery. (Adrien Grand)
* LUCENE-6317: Deprecate DataOutput.writeStringSet and writeStringStringMap.
Use writeSetOfStrings/Maps instead. (Mike McCandless, Robert Muir)
* LUCENE-6307: Rename SegmentInfo.getDocCount -> .maxDoc,
SegmentInfos.totalDocCount -> .totalMaxDoc, MergeInfo.totalDocCount
-> .totalMaxDoc and MergePolicy.OneMerge.totalDocCount ->
.totalMaxDoc (Adrien Grand, Robert Muir, Mike McCandless)
* LUCENE-6367: PrefixQuery now subclasses AutomatonQuery, removing the
specialized PrefixTermsEnum. (Robert Muir, Mike McCandless)
Other
* LUCENE-6248: Remove unused odd constants from StandardSyntaxParser.jj
@ -172,11 +236,20 @@ Other
* LUCENE-6292: Seed StringHelper better. (Robert Muir)
* LUCENE-6333: Refactored queries to delegate their equals and hashcode
impls to the super class. (Lee Hinman via Adrien Grand)
* LUCENE-6343: DefaultSimilarity javadocs had the wrong float value to
demonstrate precision of encoded norms (András Péteri via Mike McCandless)
Changes in Runtime Behavior
* LUCENE-6255: PhraseQuery now ignores leading holes and requires that
positions are positive and added in order. (Adrien Grand)
* LUCENE-6298: SimpleQueryParser returns an empty query rather than
null, if e.g. the terms were all stopwords. (Lee Hinman via Robert Muir)
======================= Lucene 5.0.0 =======================
New Features
@ -705,6 +778,40 @@ Bug fixes
sorted (set) doc values instance at the same time.
(Tom Shally, Robert Muir, Adrien Grand)
* LUCENE-6093: Don't throw NullPointerException from
BlendedInfixSuggester for lookups that do not end in a prefix
token. (jane chang via Mike McCandless)
* LUCENE-6279: Don't let an abusive leftover _N_upgraded.si in the
index directory cause index corruption on upgrade (Robert Muir, Mike
McCandless)
* LUCENE-6287: Fix concurrency bug in IndexWriter that could cause
index corruption (missing _N.si files) the first time 4.x kisses a
3.x index if merges are also running. (Simon Willnauer, Mike
McCandless)
* LUCENE-6205: Fixed intermittent concurrency issue that could cause
FileNotFoundException when writing doc values updates at the same
time that a merge kicks off. (Mike McCandless)
* LUCENE-6214: Fixed IndexWriter deadlock when one thread is
committing while another opens a near-real-time reader and an
unrecoverable (tragic) exception is hit. (Simon Willnauer, Mike
McCandless)
* LUCENE-6105: Don't cache FST root arcs if the number of root arcs is
small, or if the cache would be > 20% of the size of the FST.
(Robert Muir, Mike McCandless)
* LUCENE-6001: DrillSideways hits NullPointerException for certain
BooleanQuery searches. (Dragan Jotannovic, jane chang via Mike
McCandless)
* LUCENE-6306: Merging of doc values and norms now checks whether the
merge was aborted so IndexWriter.rollback can more promptly abort a
running merge. (Robert Muir, Mike McCandless)
API Changes
* LUCENE-6212: Deprecate IndexWriter APIs that accept per-document Analyzer.

View File

@ -181,9 +181,9 @@ public final class StemmerOverrideFilter extends TokenFilter {
charsSpare.grow(length);
final char[] buffer = charsSpare.chars();
for (int i = 0; i < length; ) {
i += Character.toChars(
Character.toLowerCase(
Character.codePointAt(input, i)), buffer, i);
i += Character.toChars(
Character.toLowerCase(
Character.codePointAt(input, i)), buffer, i);
}
spare.copyChars(buffer, 0, length);
} else {

View File

@ -138,7 +138,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
};
}
try {
try (Analyzer a = analyzer) {
String formatClass = format;
if (format == null || format.equals("solr")) {
formatClass = SolrSynonymParser.class.getName();
@ -146,7 +146,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
formatClass = WordnetSynonymParser.class.getName();
}
// TODO: expose dedup as a parameter?
map = loadSynonyms(loader, formatClass, true, analyzer);
map = loadSynonyms(loader, formatClass, true, a);
} catch (ParseException e) {
throw new IOException("Error parsing synonyms file:", e);
}

View File

@ -31,7 +31,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ArabicAnalyzer();
new ArabicAnalyzer().close();
}
/**
@ -53,6 +53,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "ما ملكت أيمانكم", new String[] { "ملكت", "ايمانكم"});
assertAnalyzesTo(a, "الذين ملكت أيمانكم", new String[] { "ملكت", "ايمانكم" }); // stopwords
a.close();
}
/**
@ -62,14 +63,17 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
ArabicAnalyzer a = new ArabicAnalyzer();
assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
a.close();
}
/**
* Non-arabic text gets treated in a similar way as SimpleAnalyzer.
*/
public void testEnglishInput() throws Exception {
assertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] {
ArabicAnalyzer a = new ArabicAnalyzer();
assertAnalyzesTo(a, "English text.", new String[] {
"english", "text" });
a.close();
}
/**
@ -80,6 +84,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
ArabicAnalyzer a = new ArabicAnalyzer(set);
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
a.close();
}
public void testWithStemExclusionSet() throws IOException {
@ -87,15 +92,18 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
ArabicAnalyzer a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
a.close();
a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new ArabicAnalyzer(), 1000*RANDOM_MULTIPLIER);
ArabicAnalyzer a = new ArabicAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -104,6 +104,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -141,5 +141,6 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -32,24 +32,27 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath
*/
public void testResourcesAvailable() {
new BulgarianAnalyzer();
new BulgarianAnalyzer().close();
}
public void testStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer();
assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"});
a.close();
}
public void testCustomStopwords() throws IOException {
Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "Как се казваш?",
new String[] {"как", "се", "казваш"});
a.close();
}
public void testReusableTokenStream() throws IOException {
Analyzer a = new BulgarianAnalyzer();
assertAnalyzesTo(a, "документи", new String[] {"документ"});
assertAnalyzesTo(a, "документ", new String[] {"документ"});
a.close();
}
/**
@ -64,6 +67,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "компютър", new String[] {"компютр"});
assertAnalyzesTo(a, "градове", new String[] {"град"});
a.close();
}
public void testWithStemExclusionSet() throws IOException {
@ -71,10 +75,13 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
set.add("строеве");
Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new BulgarianAnalyzer(), 1000*RANDOM_MULTIPLIER);
BulgarianAnalyzer a = new BulgarianAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -97,6 +97,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "братя", new String[] {"брат"});
assertAnalyzesTo(a, "братята", new String[] {"брат"});
assertAnalyzesTo(a, "брате", new String[] {"брат"});
a.close();
}
/**
@ -109,6 +111,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "вестта", new String[] {"вест"});
assertAnalyzesTo(a, "вести", new String[] {"вест"});
assertAnalyzesTo(a, "вестите", new String[] {"вест"});
a.close();
}
/**
@ -138,6 +142,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "изключенията", new String[] {"изключени"});
/* note the below form in this example does not conflate with the rest */
assertAnalyzesTo(a, "изключения", new String[] {"изключн"});
a.close();
}
/**
@ -154,6 +160,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "красивото", new String[] {"красив"});
assertAnalyzesTo(a, "красиви", new String[] {"красив"});
assertAnalyzesTo(a, "красивите", new String[] {"красив"});
a.close();
}
/**
@ -212,6 +219,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
/* note the below forms conflate with each other, but not the rest */
assertAnalyzesTo(a, "строя", new String[] {"стр"});
assertAnalyzesTo(a, "строят", new String[] {"стр"});
a.close();
}
public void testWithKeywordAttribute() throws IOException {
@ -234,5 +243,6 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -135,12 +135,14 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
checkReuse(a, "boainain", "boainain");
checkReuse(a, "boas", "boas");
checkReuse(a, "bôas", "boas"); // removes diacritic: different from snowball portugese
a.close();
}
public void testStemExclusionTable() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer(
CharArraySet.EMPTY_SET, new CharArraySet(asSet("quintessência"), false));
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
a.close();
}
public void testWithKeywordAttribute() throws IOException {
@ -154,7 +156,9 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new BrazilianAnalyzer(), input, expected);
BrazilianAnalyzer a = new BrazilianAnalyzer();
checkOneTerm(a, input, expected);
a.close();
}
private void checkReuse(Analyzer a, String input, String expected) throws Exception {
@ -163,7 +167,9 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new BrazilianAnalyzer(), 1000*RANDOM_MULTIPLIER);
BrazilianAnalyzer a = new BrazilianAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
public void testEmptyTerm() throws IOException {
@ -175,5 +181,6 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new CatalanAnalyzer();
new CatalanAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "llengua", "llengu");
// stopword
assertAnalyzesTo(a, "un", new String[] { });
a.close();
}
/** test use of elisionfilter */
@ -45,6 +46,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new CatalanAnalyzer();
assertAnalyzesTo(a, "Diccionari de l'Institut d'Estudis Catalans",
new String[] { "diccion", "inst", "estud", "catalan" });
a.close();
}
/** test use of exclusion set */
@ -53,10 +55,13 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "llengües", "llengües");
checkOneTerm(a, "llengua", "llengu");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new CatalanAnalyzer(), 1000*RANDOM_MULTIPLIER);
CatalanAnalyzer a = new CatalanAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -25,7 +25,6 @@ import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
@ -403,16 +402,22 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
public void testRandom() throws Exception {
int numRounds = RANDOM_MULTIPLIER * 1000;
checkRandomData(random(), newTestAnalyzer(), numRounds);
Analyzer a = newTestAnalyzer();
checkRandomData(random(), a, numRounds);
a.close();
}
public void testRandomHugeStrings() throws Exception {
int numRounds = RANDOM_MULTIPLIER * 100;
checkRandomData(random(), newTestAnalyzer(), numRounds, 8192);
Analyzer a = newTestAnalyzer();
checkRandomData(random(), a, numRounds, 8192);
a.close();
}
public void testCloseBR() throws Exception {
checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), " Secretary)</br> [[M");
Analyzer a = newTestAnalyzer();
checkAnalysisConsistency(random(), a, random().nextBoolean(), " Secretary)</br> [[M");
a.close();
}
public void testServerSideIncludes() throws Exception {
@ -549,7 +554,9 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
public void testRandomBrokenHTML() throws Exception {
int maxNumElements = 10000;
String text = TestUtil.randomHtmlishString(random(), maxNumElements);
checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), text);
Analyzer a = newTestAnalyzer();
checkAnalysisConsistency(random(), a, random().nextBoolean(), text);
a.close();
}
public void testRandomText() throws Exception {
@ -617,6 +624,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
assertAnalyzesTo(analyzer, " &#57209;", new String[] { "\uFFFD" } );
assertAnalyzesTo(analyzer, " &#57209", new String[] { "\uFFFD" } );
assertAnalyzesTo(analyzer, " &#57209<br>", new String[] { "&#57209" } );
analyzer.close();
}

View File

@ -216,6 +216,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
int numRounds = RANDOM_MULTIPLIER * 10000;
checkRandomData(random(), analyzer, numRounds);
analyzer.close();
}
//@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
@ -242,6 +243,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
String text = "gzw f quaxot";
checkAnalysisConsistency(random(), analyzer, false, text);
analyzer.close();
}
//@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
@ -263,6 +265,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
};
int numRounds = 100;
checkRandomData(random(), analyzer, numRounds);
analyzer.close();
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cjk;
import java.io.IOException;
import java.io.Reader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -39,7 +38,19 @@ import org.apache.lucene.analysis.util.CharArraySet;
* Most tests adopted from TestCJKTokenizer
*/
public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
private Analyzer analyzer = new CJKAnalyzer();
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new CJKAnalyzer();
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
public void testJa1() throws IOException {
assertAnalyzesTo(analyzer, "一二三四五六七八九十",
@ -228,6 +239,8 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
// before bigramming, the 4 tokens look like:
// { 0, 0, 1, 1 },
// { 0, 1, 1, 2 }
analyzer.close();
}
private static class FakeStandardTokenizer extends TokenFilter {
@ -267,17 +280,21 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
new int[] { 1 },
new String[] { "<SINGLE>" },
new int[] { 1 });
analyzer.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new CJKAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new CJKAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Random random = random();
checkRandomData(random, new CJKAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
Analyzer a = new CJKAnalyzer();
checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192);
a.close();
}
public void testEmptyTerm() throws IOException {
@ -289,5 +306,6 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -17,31 +17,42 @@ package org.apache.lucene.analysis.cjk;
* limitations under the License.
*/
import java.io.Reader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
return new TokenStreamComponents(t, new CJKBigramFilter(t));
}
};
Analyzer analyzer, unibiAnalyzer;
Analyzer unibiAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
return new TokenStreamComponents(t,
new CJKBigramFilter(t, 0xff, true));
}
};
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
return new TokenStreamComponents(t, new CJKBigramFilter(t));
}
};
unibiAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
return new TokenStreamComponents(t,
new CJKBigramFilter(t, 0xff, true));
}
};
}
@Override
public void tearDown() throws Exception {
IOUtils.close(analyzer, unibiAnalyzer);
super.tearDown();
}
public void testHuge() throws Exception {
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
@ -79,6 +90,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
a.close();
}
public void testAllScripts() throws Exception {
@ -92,6 +104,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
};
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
a.close();
}
public void testUnigramsAndBigramsAllScripts() throws Exception {
@ -132,6 +145,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
a.close();
}
public void testUnigramsAndBigramsHuge() throws Exception {

View File

@ -29,13 +29,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Tests for {@link CJKWidthFilter}
*/
public class TestCJKWidthFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new CJKWidthFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new CJKWidthFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/**
* Full-width ASCII forms normalized to half-width (basic latin)
@ -74,5 +86,6 @@ public class TestCJKWidthFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -32,24 +32,27 @@ public class TestSoraniAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath
*/
public void testResourcesAvailable() {
new SoraniAnalyzer();
new SoraniAnalyzer().close();
}
public void testStopwords() throws IOException {
Analyzer a = new SoraniAnalyzer();
assertAnalyzesTo(a, "ئەم پیاوە", new String[] {"پیاو"});
a.close();
}
public void testCustomStopwords() throws IOException {
Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET);
assertAnalyzesTo(a, "ئەم پیاوە",
new String[] {"ئەم", "پیاو"});
a.close();
}
public void testReusableTokenStream() throws IOException {
Analyzer a = new SoraniAnalyzer();
assertAnalyzesTo(a, "پیاوە", new String[] {"پیاو"});
assertAnalyzesTo(a, "پیاو", new String[] {"پیاو"});
a.close();
}
public void testWithStemExclusionSet() throws IOException {
@ -57,10 +60,13 @@ public class TestSoraniAnalyzer extends BaseTokenStreamTestCase {
set.add("پیاوە");
Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "پیاوە", new String[] { "پیاوە" });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new SoraniAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new SoraniAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ckb;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -30,13 +29,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Tests normalization for Sorani (this is more critical than stemming...)
*/
public class TestSoraniNormalizationFilter extends BaseTokenStreamTestCase {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
}
};
Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
public void testY() throws Exception {
checkOneTerm(a, "\u064A", "\u06CC");
@ -96,5 +107,6 @@ public class TestSoraniNormalizationFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ckb;
import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -31,7 +30,19 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Test the Sorani Stemmer.
*/
public class TestSoraniStemFilter extends BaseTokenStreamTestCase {
SoraniAnalyzer a = new SoraniAnalyzer();
Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new SoraniAnalyzer();
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
public void testIndefiniteSingular() throws Exception {
checkOneTerm(a, "پیاوێک", "پیاو"); // -ek
@ -90,6 +101,7 @@ public class TestSoraniStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
/** test against a basic vocabulary file */

View File

@ -156,6 +156,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
new String[] { "s_s", "s_s" });
assertAnalyzesTo(a, "of the of",
new String[] { "of_the", "the_of" });
a.close();
}
public void testCommonGramsFilter() throws Exception {
@ -242,6 +243,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "of the of",
new String[] { "of", "of_the", "the", "the_of", "of" },
new int[] { 1, 0, 1, 0, 1 });
a.close();
}
/**
@ -330,6 +332,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
Analyzer b = new Analyzer() {
@ -342,5 +345,6 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
};
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
b.close();
}
}

View File

@ -336,6 +336,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
new String[] { "bankueberfall", "fall" },
new int[] { 0, 0 },
new int[] { 12, 12 });
analyzer.close();
}
/** blast some random strings through the analyzer */
@ -350,6 +351,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
final HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
@ -363,6 +365,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
b.close();
}
public void testEmptyTerm() throws Exception {
@ -376,6 +379,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
final HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
@ -389,5 +393,6 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(b, "", "");
b.close();
}
}

View File

@ -18,15 +18,18 @@ package org.apache.lucene.analysis.core;
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Random;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
public class TestAnalyzers extends BaseTokenStreamTestCase {
@ -48,6 +51,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
new String[] { "b" });
assertAnalyzesTo(a, "\"QUOTED\" word",
new String[] { "quoted", "word" });
a.close();
}
public void testNull() throws Exception {
@ -68,6 +72,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
new String[] { "2B" });
assertAnalyzesTo(a, "\"QUOTED\" word",
new String[] { "\"QUOTED\"", "word" });
a.close();
}
public void testStop() throws Exception {
@ -76,6 +81,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
new String[] { "foo", "bar", "foo", "bar" });
assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
new String[] { "foo", "bar", "foo", "bar" });
a.close();
}
void verifyPayload(TokenStream ts) throws IOException {
@ -159,6 +165,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
// unpaired trail surrogate
assertAnalyzesTo(a, "AbaC\uDC16AdaBa",
new String [] { "abac\uDC16adaba" });
a.close();
}
/**
@ -179,9 +186,9 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
// unpaired trail surrogate
assertAnalyzesTo(a, "AbaC\uDC16AdaBa",
new String [] { "ABAC\uDC16ADABA" });
a.close();
}
/**
* Test that LowercaseFilter handles the lowercasing correctly if the term
* buffer has a trailing surrogate character leftover and the current term in
@ -223,17 +230,20 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new WhitespaceAnalyzer(), 1000*RANDOM_MULTIPLIER);
checkRandomData(random(), new SimpleAnalyzer(), 1000*RANDOM_MULTIPLIER);
checkRandomData(random(), new StopAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StopAnalyzer() };
for (Analyzer analyzer : analyzers) {
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
}
IOUtils.close(analyzers);
}
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Random random = random();
checkRandomData(random, new WhitespaceAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
checkRandomData(random, new SimpleAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
checkRandomData(random, new StopAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StopAnalyzer() };
for (Analyzer analyzer : analyzers) {
checkRandomData(random(), analyzer, 100*RANDOM_MULTIPLIER, 8192);
}
IOUtils.close(analyzers);
}
}

View File

@ -75,6 +75,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
}
};
checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
a.close();
}
CharFilter wrappedStream = new CharFilter(new StringReader("bogus")) {
@ -261,6 +262,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), analyzer, 2000);
analyzer.close();
}
public void testCuriousWikipediaString() throws Exception {
@ -285,5 +287,6 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
}
};
checkAnalysisConsistency(random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
a.close();
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@ -78,6 +79,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
right.tokenStream("foo", newStringReader(s)));
}
IOUtils.close(left, right);
}
// not so useful since it's all one token?!
@ -99,6 +101,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
right.tokenStream("foo", newStringReader(s)));
}
IOUtils.close(left, right);
}
public void testLetterHtmlish() throws Exception {
@ -116,6 +119,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
right.tokenStream("foo", newStringReader(s)));
}
IOUtils.close(left, right);
}
public void testLetterHtmlishHuge() throws Exception {
@ -136,6 +140,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
right.tokenStream("foo", newStringReader(s)));
}
IOUtils.close(left, right);
}
public void testLetterUnicode() throws Exception {
@ -153,6 +158,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
right.tokenStream("foo", newStringReader(s)));
}
IOUtils.close(left, right);
}
public void testLetterUnicodeHuge() throws Exception {
@ -173,6 +179,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
right.tokenStream("foo", newStringReader(s)));
}
IOUtils.close(left, right);
}
// we only check a few core attributes here.

View File

@ -46,6 +46,8 @@ import org.apache.lucene.util.Version;
// TODO: move this, TestRandomChains, and TestAllAnalyzersHaveFactories
// to an integration test module that sucks in all analysis modules.
// currently the only way to do this is via eclipse etc (LUCENE-3974)
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
public class TestFactories extends BaseTokenStreamTestCase {
public void test() throws IOException {
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
@ -77,7 +79,9 @@ public class TestFactories extends BaseTokenStreamTestCase {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 20, 20, false, false);
Analyzer a = new FactoryAnalyzer(factory, null, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
@ -97,7 +101,9 @@ public class TestFactories extends BaseTokenStreamTestCase {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 20, 20, false, false);
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
@ -117,7 +123,9 @@ public class TestFactories extends BaseTokenStreamTestCase {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 20, 20, false, false);
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.core;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -33,23 +34,24 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
private Directory directory;
private IndexSearcher searcher;
private IndexReader reader;
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(new SimpleAnalyzer()));
analyzer = new SimpleAnalyzer();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
Document doc = new Document();
doc.add(new StringField("partnum", "Q36", Field.Store.YES));
@ -59,13 +61,11 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
writer.close();
reader = DirectoryReader.open(directory);
searcher = newSearcher(reader);
}
@Override
public void tearDown() throws Exception {
reader.close();
directory.close();
IOUtils.close(analyzer, reader, directory);
super.tearDown();
}
@ -86,7 +86,8 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
public void testMutipleDocument() throws Exception {
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
Analyzer analyzer = new KeywordAnalyzer();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
Document doc = new Document();
doc.add(new TextField("partnum", "Q36", Field.Store.YES));
writer.addDocument(doc);
@ -112,11 +113,13 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
null,
0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
analyzer.close();
}
// LUCENE-1441
public void testOffsets() throws Exception {
try (TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"))) {
try (Analyzer analyzer = new KeywordAnalyzer();
TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd"))) {
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
assertTrue(stream.incrementToken());
@ -129,6 +132,8 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new KeywordAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new KeywordAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -901,16 +901,17 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
Random random = random();
for (int i = 0; i < numIterations; i++) {
MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
if (VERBOSE) {
System.out.println("Creating random analyzer:" + a);
}
try {
checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false,
false /* We already validate our own offsets... */);
} catch (Throwable e) {
System.err.println("Exception from random analyzer: " + a);
throw e;
try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
if (VERBOSE) {
System.out.println("Creating random analyzer:" + a);
}
try {
checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false,
false /* We already validate our own offsets... */);
} catch (Throwable e) {
System.err.println("Exception from random analyzer: " + a);
throw e;
}
}
}
}
@ -920,16 +921,17 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
Random random = random();
for (int i = 0; i < numIterations; i++) {
MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
if (VERBOSE) {
System.out.println("Creating random analyzer:" + a);
}
try {
checkRandomData(random, a, 50*RANDOM_MULTIPLIER, 80, false,
false /* We already validate our own offsets... */);
} catch (Throwable e) {
System.err.println("Exception from random analyzer: " + a);
throw e;
try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
if (VERBOSE) {
System.out.println("Creating random analyzer:" + a);
}
try {
checkRandomData(random, a, 50*RANDOM_MULTIPLIER, 80, false,
false /* We already validate our own offsets... */);
} catch (Throwable e) {
System.err.println("Exception from random analyzer: " + a);
throw e;
}
}
}
}

View File

@ -30,7 +30,7 @@ import java.util.HashSet;
public class TestStopAnalyzer extends BaseTokenStreamTestCase {
private StopAnalyzer stop = new StopAnalyzer();
private StopAnalyzer stop;
private Set<Object> inValidTokens = new HashSet<>();
@Override
@ -41,6 +41,13 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
while(it.hasNext()) {
inValidTokens.add(it.next());
}
stop = new StopAnalyzer();
}
@Override
public void tearDown() throws Exception {
stop.close();
super.tearDown();
}
public void testDefaults() throws IOException {
@ -71,6 +78,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
}
stream.end();
}
newStop.close();
}
public void testStopListPositions() throws IOException {
@ -92,6 +100,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
}
stream.end();
}
newStop.close();
}
}

View File

@ -62,6 +62,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "föó bär FÖÖ BAR",
new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" },
new int[] { 1, 0, 1, 0, 1, 0, 1});
a.close();
}
public void testHtmlStripClassicFolding() throws Exception {
@ -93,6 +94,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "<p><b>föó</b> bär FÖÖ BAR</p>",
new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" },
new int[] { 1, 0, 1, 0, 1, 0, 1});
a.close();
}
public void testStopWordsFromClasspath() throws Exception {
@ -114,6 +116,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertSame(Version.LATEST, a.getVersion());
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
a.close();
}
public void testStopWordsFromClasspathWithMap() throws Exception {
@ -141,6 +144,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
} catch (IllegalArgumentException | UnsupportedOperationException e) {
// pass
}
a.close();
}
public void testStopWordsFromFile() throws Exception {
@ -152,6 +156,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
"format", "wordset")
.build();
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
a.close();
}
public void testStopWordsFromFileAbsolute() throws Exception {
@ -163,6 +168,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
"format", "wordset")
.build();
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
a.close();
}
// Now test misconfigurations:

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cz;
*/
import java.io.IOException;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharArraySet;
@ -31,15 +32,24 @@ import org.apache.lucene.analysis.util.CharArraySet;
*/
public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new CzechAnalyzer().close();
}
public void testStopWord() throws Exception {
assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem",
Analyzer analyzer = new CzechAnalyzer();
assertAnalyzesTo(analyzer, "Pokud mluvime o volnem",
new String[] { "mluvim", "voln" });
analyzer.close();
}
public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CzechAnalyzer();
assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
assertAnalyzesTo(analyzer, "Česká Republika", new String[] { "česk", "republik" });
analyzer.close();
}
public void testWithStemExclusionSet() throws IOException{
@ -47,10 +57,13 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
set.add("hole");
CzechAnalyzer cz = new CzechAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
cz.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new CzechAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new CzechAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cz;
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
@ -103,6 +102,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(cz, "soudcům", new String[] { "soudk" });
assertAnalyzesTo(cz, "soudcích", new String[] { "soudk" });
assertAnalyzesTo(cz, "soudcem", new String[] { "soudk" });
cz.close();
}
/**
@ -147,6 +148,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(cz, "ženách", new String[] { "žn" });
assertAnalyzesTo(cz, "ženou", new String[] { "žn" });
assertAnalyzesTo(cz, "ženami", new String[] { "žn" });
cz.close();
}
/**
@ -189,7 +192,9 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(cz, "stavení", new String[] { "stavn" });
assertAnalyzesTo(cz, "stavením", new String[] { "stavn" });
assertAnalyzesTo(cz, "staveních", new String[] { "stavn" });
assertAnalyzesTo(cz, "staveními", new String[] { "stavn" });
assertAnalyzesTo(cz, "staveními", new String[] { "stavn" });
cz.close();
}
/**
@ -218,6 +223,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
assertAnalyzesTo(cz, "jarnímu", new String[] { "jarn" });
assertAnalyzesTo(cz, "jarním", new String[] { "jarn" });
assertAnalyzesTo(cz, "jarními", new String[] { "jarn" });
cz.close();
}
/**
@ -227,6 +234,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
CzechAnalyzer cz = new CzechAnalyzer();
assertAnalyzesTo(cz, "Karlův", new String[] { "karl" });
assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" });
cz.close();
}
/**
@ -267,6 +275,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
/* rewrite of e* -> * */
assertAnalyzesTo(cz, "deska", new String[] { "desk" });
assertAnalyzesTo(cz, "desek", new String[] { "desk" });
cz.close();
}
/**
@ -276,6 +286,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
CzechAnalyzer cz = new CzechAnalyzer();
assertAnalyzesTo(cz, "e", new String[] { "e" });
assertAnalyzesTo(cz, "zi", new String[] { "zi" });
cz.close();
}
public void testWithKeywordAttribute() throws IOException {
@ -297,6 +308,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new DanishAnalyzer();
new DanishAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "undersøgelse", "undersøg");
// stopword
assertAnalyzesTo(a, "", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
DanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "undersøgelse", "undersøgelse");
checkOneTerm(a, "undersøg", "undersøg");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new DanishAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new DanishAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -32,6 +32,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "Tisch", "tisch");
checkOneTerm(a, "Tische", "tisch");
checkOneTerm(a, "Tischen", "tisch");
a.close();
}
public void testWithKeywordAttribute() throws IOException {
@ -48,6 +49,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
GermanAnalyzer a = new GermanAnalyzer( CharArraySet.EMPTY_SET,
new CharArraySet( asSet("tischen"), false));
checkOneTerm(a, "tischen", "tischen");
a.close();
}
/** test some features of the new snowball filter
@ -58,10 +60,13 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
// a/o/u + e is equivalent to the umlaut form
checkOneTerm(a, "Schaltflächen", "schaltflach");
checkOneTerm(a, "Schaltflaechen", "schaltflach");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new GermanAnalyzer(), 1000*RANDOM_MULTIPLIER);
GermanAnalyzer a = new GermanAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.de;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link GermanLightStemFilter}
*/
public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
@ -59,6 +70,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "sängerinnen", "sängerinnen");
a.close();
}
/** blast some random strings through the analyzer */
@ -75,5 +87,6 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.de;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link GermanMinimalStemFilter}
*/
public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test some examples from the paper */
public void testExamples() throws IOException {
@ -66,6 +77,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "sängerinnen", "sängerinnen");
a.close();
}
/** Test against a vocabulary from the reference impl */
@ -87,5 +99,6 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.de;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -31,14 +30,26 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Tests {@link GermanNormalizationFilter}
*/
public class TestGermanNormalizationFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String field) {
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new GermanNormalizationFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String field) {
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new GermanNormalizationFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/**
* Tests that a/o/u + e is equivalent to the umlaut form
@ -76,5 +87,6 @@ public class TestGermanNormalizationFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
@ -39,14 +40,26 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t,
new GermanStemFilter(new LowerCaseFilter(t)));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t,
new GermanStemFilter(new LowerCaseFilter(t)));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt");
@ -65,6 +78,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "sängerinnen", "sängerinnen");
a.close();
}
/** blast some random strings through the analyzer */
@ -81,5 +95,6 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -45,6 +45,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
// as well as the elimination of stop words
assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
a.close();
}
public void testReusableTokenStream() throws Exception {
@ -62,10 +63,13 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
// as well as the elimination of stop words
assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new GreekAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new GreekAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.el;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -26,7 +25,19 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
public class TestGreekStemmer extends BaseTokenStreamTestCase {
Analyzer a = new GreekAnalyzer();
private Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new GreekAnalyzer();
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
public void testMasculineNouns() throws Exception {
// -ος
@ -537,5 +548,6 @@ public class TestGreekStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new EnglishAnalyzer();
new EnglishAnalyzer().close();
}
/** test stopwords and stemming */
@ -42,6 +42,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "steven's", "steven");
checkOneTerm(a, "steven\u2019s", "steven");
checkOneTerm(a, "steven\uFF07s", "steven");
a.close();
}
/** test use of exclusion set */
@ -51,10 +52,13 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "books", "books");
checkOneTerm(a, "book", "book");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new EnglishAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new EnglishAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.en;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -30,14 +29,26 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Simple tests for {@link EnglishMinimalStemFilter}
*/
public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test some examples from various papers about this technique */
public void testExamples() throws IOException {
checkOneTerm(analyzer, "queries", "query");
@ -65,5 +76,6 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.en;
import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -32,13 +31,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Tests for {@link KStemmer}
*/
public class TestKStemmer extends BaseTokenStreamTestCase {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
}
};
private Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
@ -63,6 +74,7 @@ public class TestKStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
/****** requires original java kstem source code to create map

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.en;
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -36,13 +35,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Test the PorterStemFilter with Martin Porter's test data.
*/
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer( MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t, new PorterStemFilter(t));
}
};
private Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer( MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t, new PorterStemFilter(t));
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
/**
* Run the stemmer against all strings in voc.txt
@ -75,5 +86,6 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new SpanishAnalyzer();
new SpanishAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "chicano", "chican");
// stopword
assertAnalyzesTo(a, "los", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "chicana", "chican");
checkOneTerm(a, "chicano", "chicano");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new SpanishAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new SpanishAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.es;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -32,14 +31,26 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link SpanishLightStemFilter}
*/
public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataPath("eslighttestdata.zip"), "eslight.txt");
@ -59,5 +70,6 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new BasqueAnalyzer();
new BasqueAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "zaldiak", "zaldi");
// stopword
assertAnalyzesTo(a, "izan", new String[] { });
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "zaldiak", "zaldiak");
checkOneTerm(a, "mendiari", "mendi");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new BasqueAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new BasqueAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -31,7 +31,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath
*/
public void testResourcesAvailable() {
new PersianAnalyzer();
new PersianAnalyzer().close();
}
/**
@ -105,6 +105,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
// active present subjunctive
assertAnalyzesTo(a, "بخورد", new String[] { "بخورد" });
a.close();
}
/**
@ -181,6 +182,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
// active present subjunctive
assertAnalyzesTo(a, "بخورد", new String[] { "بخورد" });
a.close();
}
/**
@ -192,6 +194,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new PersianAnalyzer();
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" });
a.close();
}
/**
@ -201,6 +204,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
public void testBehaviorNonPersian() throws Exception {
Analyzer a = new PersianAnalyzer();
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
a.close();
}
/**
@ -210,6 +214,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new PersianAnalyzer();
assertAnalyzesTo(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" });
a.close();
}
/**
@ -220,10 +225,13 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
new CharArraySet( asSet("the", "and", "a"), false));
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new PersianAnalyzer(), 1000*RANDOM_MULTIPLIER);
PersianAnalyzer a = new PersianAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -24,17 +24,29 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
public class TestPersianCharFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer());
}
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer());
}
@Override
protected Reader initReader(String fieldName, Reader reader) {
return new PersianCharFilter(reader);
}
};
@Override
protected Reader initReader(String fieldName, Reader reader) {
return new PersianCharFilter(reader);
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
public void testBasics() throws Exception {
assertAnalyzesTo(analyzer, "this is a\u200Ctest",

View File

@ -18,8 +18,6 @@ package org.apache.lucene.analysis.fa;
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -73,6 +71,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new FinnishAnalyzer();
new FinnishAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "edeltäjistään", "edeltäj");
// stopword
assertAnalyzesTo(a, "olla", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "edeltäjiinsä", "edeltäj");
checkOneTerm(a, "edeltäjistään", "edeltäjistään");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new FinnishAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new FinnishAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -34,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link FinnishLightStemFilter}
*/
public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
@ -58,6 +70,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "edeltäjistään", "edeltäjistään");
a.close();
}
/** blast some random strings through the analyzer */
@ -74,5 +87,6 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -110,7 +110,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
fa,
"33Bis 1940-1945 1940:1945 (---i+++)*",
new String[] { "33bi", "1940", "1945", "1940", "1945", "i" });
fa.close();
}
public void testReusableTokenStream() throws Exception {
@ -132,6 +132,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
"chist",
"element",
"captif" });
fa.close();
}
public void testExclusionTableViaCtor() throws Exception {
@ -141,15 +142,18 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
"chist" });
fa.close();
fa = new FrenchAnalyzer( CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
"chist" });
fa.close();
}
public void testElision() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer();
assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouil" });
fa.close();
}
/**
@ -158,11 +162,14 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
public void testStopwordsCasing() throws IOException {
FrenchAnalyzer a = new FrenchAnalyzer();
assertAnalyzesTo(a, "Votre", new String[] { });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new FrenchAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new FrenchAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
/** test accent-insensitive */
@ -170,5 +177,6 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new FrenchAnalyzer();
checkOneTerm(a, "sécuritaires", "securitair");
checkOneTerm(a, "securitaires", "securitair");
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.fr;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link FrenchLightStemFilter}
*/
public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer( MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer( MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test some examples from the paper */
public void testExamples() throws IOException {
@ -189,6 +200,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "chevaux", "chevaux");
a.close();
}
/** blast some random strings through the analyzer */
@ -205,5 +217,6 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.fr;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link FrenchMinimalStemFilter}
*/
public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test some examples from the paper */
public void testExamples() throws IOException {
@ -68,6 +79,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "chevaux", "chevaux");
a.close();
}
/** Test against a vocabulary from the reference impl */
@ -89,5 +101,6 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new IrishAnalyzer();
new IrishAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "síceapatacha", "síceapaite");
// stopword
assertAnalyzesTo(a, "le", new String[] { });
a.close();
}
/** test use of elisionfilter */
@ -45,6 +46,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new IrishAnalyzer();
assertAnalyzesTo(a, "b'fhearr m'athair",
new String[] { "fearr", "athair" });
a.close();
}
/** test use of exclusion set */
@ -54,6 +56,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
IrishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "feirmeoireacht", "feirmeoireacht");
checkOneTerm(a, "siopadóireacht", "siopadóir");
a.close();
}
/** test special hyphen handling */
@ -62,10 +65,13 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "n-athair",
new String[] { "athair" },
new int[] { 2 });
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new IrishAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer a = new IrishAnalyzer();
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -48,5 +48,6 @@ public class TestIrishLowerCaseFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new GalicianAnalyzer();
new GalicianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "corresponderá", "correspond");
// stopword
assertAnalyzesTo(a, "e", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "correspondente", "correspondente");
checkOneTerm(a, "corresponderá", "correspond");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new GalicianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new GalicianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.gl;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -33,13 +32,25 @@ import org.apache.lucene.analysis.util.CharArraySet;
* Simple tests for {@link GalicianMinimalStemmer}
*/
public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer));
}
};
private Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer));
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
public void testPlural() throws Exception {
checkOneTerm(a, "elefantes", "elefante");
@ -64,6 +75,7 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "elefantes", "elefantes");
a.close();
}
/** blast some random strings through the analyzer */
@ -80,5 +92,6 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -20,29 +20,36 @@ package org.apache.lucene.analysis.gl;
import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Simple tests for {@link GalicianStemFilter}
*/
public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GalicianStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GalicianStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
@ -58,5 +65,6 @@ public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -28,7 +28,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new HindiAnalyzer();
new HindiAnalyzer().close();
}
public void testBasics() throws Exception {
@ -36,6 +36,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
// two ways to write 'hindi' itself.
checkOneTerm(a, "हिन्दी", "हिंद");
checkOneTerm(a, "हिंदी", "हिंद");
a.close();
}
public void testExclusionSet() throws Exception {
@ -43,10 +44,13 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new HindiAnalyzer(
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "हिंदी", "हिंदी");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new HindiAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new HindiAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -76,5 +76,6 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -95,5 +95,6 @@ public class TestHindiStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new HungarianAnalyzer();
new HungarianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "babakocsijáért", "babakocs");
// stopword
assertAnalyzesTo(a, "által", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "babakocsi", "babakocsi");
checkOneTerm(a, "babakocsijáért", "babakocs");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new HungarianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new HungarianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -34,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link HungarianLightStemFilter}
*/
public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
@ -58,6 +70,7 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "babakocsi", "babakocsi");
a.close();
}
public void testEmptyTerm() throws IOException {
@ -69,5 +82,6 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -87,6 +87,7 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
public void testEmptyTerm() throws IOException {
@ -98,6 +99,7 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
public void testIgnoreCaseNoSideEffects() throws Exception {
@ -118,5 +120,6 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ArmenianAnalyzer();
new ArmenianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "արծիվներ", "արծ");
// stopword
assertAnalyzesTo(a, "է", new String[] { });
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "արծիվներ", "արծիվներ");
checkOneTerm(a, "արծիվ", "արծ");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new ArmenianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new ArmenianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new IndonesianAnalyzer();
new IndonesianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "pembunuhan", "bunuh");
// stopword
assertAnalyzesTo(a, "bahwa", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
IndonesianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "peledakan", "peledakan");
checkOneTerm(a, "pembunuhan", "bunuh");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new IndonesianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new IndonesianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -18,26 +18,46 @@ package org.apache.lucene.analysis.id;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.IOUtils;
/**
* Tests {@link IndonesianStemmer}
*/
public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
/* full stemming, no stopwords */
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
}
};
private Analyzer a, b;
@Override
public void setUp() throws Exception {
super.setUp();
/* full stemming, no stopwords */
a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
}
};
/* inflectional-only stemming */
b = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false));
}
};
}
@Override
public void tearDown() throws Exception {
IOUtils.close(a, b);
super.tearDown();
}
/** Some examples from the paper */
public void testExamples() throws IOException {
@ -111,15 +131,6 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
checkOneTerm(a, "kecelakaan", "celaka");
}
/* inflectional-only stemming */
Analyzer b = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false));
}
};
/** Test stemming only inflectional suffixes */
public void testInflectionalOnly() throws IOException {
checkOneTerm(b, "bukunya", "buku");
@ -143,5 +154,6 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.in;
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
@ -62,5 +61,6 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ItalianAnalyzer();
new ItalianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "abbandonati", "abbandonat");
// stopword
assertAnalyzesTo(a, "dallo", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,11 +48,14 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "abbandonata", "abbandonata");
checkOneTerm(a, "abbandonati", "abbandonat");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new ItalianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new ItalianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
/** test that the elisionfilter is working */
@ -59,5 +63,6 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
Analyzer a = new ItalianAnalyzer();
assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
assertAnalyzesTo(a, "l'Italiano", new String[] { "italian" });
a.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.it;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -32,13 +31,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link ItalianLightStemFilter}
*/
public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary from the reference impl */
public void testVocabulary() throws IOException {
@ -59,5 +70,6 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new LatvianAnalyzer();
new LatvianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "tirgus", "tirg");
// stopword
assertAnalyzesTo(a, "un", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
LatvianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "tirgiem", "tirgiem");
checkOneTerm(a, "tirgus", "tirg");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new LatvianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new LatvianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.lv;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -30,13 +29,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
* Basic tests for {@link LatvianStemmer}
*/
public class TestLatvianStemmer extends BaseTokenStreamTestCase {
private Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
}
};
private Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
public void testNouns1() throws IOException {
// decl. I
@ -279,5 +290,6 @@ public class TestLatvianStemmer extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -1933,6 +1933,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
public void testEmptyTerm() throws IOException {
@ -1945,5 +1946,6 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -137,6 +137,7 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
public void testEmptyTerm() throws IOException {
@ -148,6 +149,7 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
/**

View File

@ -47,6 +47,7 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
public void testRandomStrings() throws IOException {

View File

@ -18,7 +18,6 @@
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
@ -78,6 +77,7 @@ public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase {
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
public void testEmptyTerm() throws IOException {
@ -89,5 +89,6 @@ public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -66,5 +66,6 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase {
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
}

View File

@ -18,12 +18,9 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
@ -49,6 +46,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
/**

View File

@ -52,6 +52,7 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
// equal to limit
assertTokenStreamContents(a.tokenStream("dummy", "1 2 "), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null);
a.close();
}
}
@ -86,6 +87,7 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
assertEquals(0, reader.docFreq(t));
reader.close();
dir.close();
a.close();
}
}

View File

@ -56,6 +56,7 @@ public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase {
// equal to limit
assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
a.close();
}
}

View File

@ -15,6 +15,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Rethrow;
/*
@ -40,9 +41,11 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
Map<String,Analyzer> analyzerPerField =
Collections.<String,Analyzer>singletonMap("special", new SimpleAnalyzer());
Analyzer defaultAnalyzer = new WhitespaceAnalyzer();
PerFieldAnalyzerWrapper analyzer =
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), analyzerPerField);
new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField);
try (TokenStream tokenStream = analyzer.tokenStream("field", text)) {
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
@ -67,6 +70,10 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
assertFalse(tokenStream.incrementToken());
tokenStream.end();
}
// TODO: fix this about PFAW, this is crazy
analyzer.close();
defaultAnalyzer.close();
IOUtils.close(analyzerPerField.values());
}
public void testReuseWrapped() throws Exception {
@ -124,6 +131,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
ts4 = wrapper3.tokenStream("moreSpecial", text);
assertSame(ts3, ts4);
assertSame(ts2, ts3);
IOUtils.close(wrapper3, wrapper2, wrapper1, specialAnalyzer, defaultAnalyzer);
}
public void testCharFilters() throws Exception {
@ -152,5 +160,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
new int[] { 0 },
new int[] { 2 }
);
p.close();
a.close(); // TODO: fix this about PFAW, its a trap
}
}

View File

@ -163,6 +163,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase {
};
checkRandomData(random(), analyzer, 200);
analyzer.close();
}
}
@ -175,6 +176,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -24,19 +24,27 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import java.io.Reader;
public class TestScandinavianFoldingFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String field) {
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new ScandinavianFoldingFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String field) {
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new ScandinavianFoldingFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
public void test() throws Exception {
@ -117,6 +125,7 @@ public class TestScandinavianFoldingFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
/** blast some random strings through the analyzer */

View File

@ -24,20 +24,27 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import java.io.Reader;
public class TestScandinavianNormalizationFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String field) {
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new ScandinavianNormalizationFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String field) {
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new ScandinavianNormalizationFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
public void test() throws Exception {
@ -116,6 +123,7 @@ public class TestScandinavianNormalizationFilter extends BaseTokenStreamTestCase
}
};
checkOneTerm(a, "", "");
a.close();
}
/** blast some random strings through the analyzer */

View File

@ -19,9 +19,10 @@ import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -31,6 +32,7 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.TestUtil;
/**
@ -79,7 +81,12 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
public void testRandomRealisticWhiteSpace() throws IOException {
Map<String,String> map = new HashMap<>();
Set<String> seen = new HashSet<>();
int numTerms = atLeast(50);
boolean ignoreCase = random().nextBoolean();
CharacterUtils charUtils = CharacterUtils.getInstance();
for (int i = 0; i < numTerms; i++) {
String randomRealisticUnicodeString = TestUtil
.randomRealisticUnicodeString(random());
@ -93,16 +100,31 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
j += Character.charCount(cp);
}
if (builder.length() > 0) {
String value = TestUtil.randomSimpleString(random());
map.put(builder.toString(),
value.isEmpty() ? "a" : value);
String inputValue = builder.toString();
// Make sure we don't try to add two inputs that vary only by case:
String seenInputValue;
if (ignoreCase) {
// TODO: can we simply use inputValue.toLowerCase(Locale.ROOT)???
char[] buffer = inputValue.toCharArray();
charUtils.toLowerCase(buffer, 0, buffer.length);
seenInputValue = buffer.toString();
} else {
seenInputValue = inputValue;
}
if (seen.contains(seenInputValue) == false) {
seen.add(seenInputValue);
String value = TestUtil.randomSimpleString(random());
map.put(inputValue,
value.isEmpty() ? "a" : value);
}
}
}
if (map.isEmpty()) {
map.put("booked", "books");
}
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
Set<Entry<String,String>> entrySet = map.entrySet();
StringBuilder input = new StringBuilder();
List<String> output = new ArrayList<>();

View File

@ -18,7 +18,6 @@
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -103,6 +102,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
a.close();
}
public void testEmptyTerm() throws IOException {
@ -114,5 +114,6 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.IOUtils;
import org.junit.Test;
import java.io.IOException;
@ -292,6 +293,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 4, 4, 11 },
new int[] { 10, 15, 15 },
new int[] { 2, 0, 1 });
IOUtils.close(a, a2, a3);
}
/** concat numbers + words + all */
@ -312,6 +314,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 0, 0, 0, 4, 8, 8, 12 },
new int[] { 3, 7, 15, 7, 11, 15, 15 },
new int[] { 1, 0, 0, 1, 1, 0, 1 });
a.close();
}
/** concat numbers + words + all + preserve original */
@ -332,6 +335,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 0, 0, 0, 0, 4, 8, 8, 12 },
new int[] { 15, 3, 7, 15, 7, 11, 15, 15 },
new int[] { 1, 0, 0, 0, 1, 1, 0, 1 });
a.close();
}
/** blast some random strings through the analyzer */
@ -356,6 +360,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
};
// TODO: properly support positionLengthAttribute
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20, false, false);
a.close();
}
}
@ -381,6 +386,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
};
// TODO: properly support positionLengthAttribute
checkRandomData(random(), a, 20*RANDOM_MULTIPLIER, 8192, false, false);
a.close();
}
}
@ -404,6 +410,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
};
// depending upon options, this thing may or may not preserve the empty term
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
}
}

View File

@ -35,7 +35,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
/**
* Tests {@link EdgeNGramTokenFilter} for correctness.
@ -183,6 +182,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), a, 100*RANDOM_MULTIPLIER);
a.close();
}
}
@ -197,6 +197,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
}
};
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
public void testGraphs() throws IOException {

View File

@ -113,6 +113,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase {
};
checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 20);
checkRandomData(random(), a, 10*RANDOM_MULTIPLIER, 8192);
a.close();
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -140,6 +139,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
analyzer.close();
}
/** blast some random strings through the analyzer */
@ -156,6 +156,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
}
};
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
a.close();
}
}
@ -170,6 +171,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
}
};
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
public void testSupplementaryCharacters() throws IOException {

View File

@ -123,6 +123,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase {
};
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
checkRandomData(random(), a, 10*RANDOM_MULTIPLIER, 1027);
a.close();
}
}

View File

@ -117,6 +117,7 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "opheffen", "opheff");
checkOneTerm(a, "opheffende", "opheff");
checkOneTerm(a, "opheffing", "opheff");
a.close();
}
public void testReusableTokenStream() throws Exception {
@ -125,6 +126,7 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "lichamelijk", "licham");
checkOneTerm(a, "lichamelijke", "licham");
checkOneTerm(a, "lichamelijkheden", "licham");
a.close();
}
public void testExclusionTableViaCtor() throws IOException {
@ -132,10 +134,11 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
set.add("lichamelijk");
DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
a.close();
a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
a.close();
}
/**
@ -145,12 +148,14 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
public void testStemOverrides() throws IOException {
DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET);
checkOneTerm(a, "fiets", "fiets");
a.close();
}
public void testEmptyStemDictionary() throws IOException {
DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET,
CharArraySet.EMPTY_SET, CharArrayMap.<String>emptyMap());
checkOneTerm(a, "fiets", "fiet");
a.close();
}
/**
@ -159,15 +164,20 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
public void testStopwordsCasing() throws IOException {
DutchAnalyzer a = new DutchAnalyzer();
assertAnalyzesTo(a, "Zelf", new String[] { });
a.close();
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new DutchAnalyzer(), input, expected);
Analyzer analyzer = new DutchAnalyzer();
checkOneTerm(analyzer, input, expected);
analyzer.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new DutchAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new DutchAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -27,7 +27,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new NorwegianAnalyzer();
new NorwegianAnalyzer().close();
}
/** test stopwords and stemming */
@ -38,6 +38,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "havnedistrikter", "havnedistrikt");
// stopword
assertAnalyzesTo(a, "det", new String[] {});
a.close();
}
/** test use of exclusion set */
@ -47,10 +48,13 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "havnedistriktene", "havnedistriktene");
checkOneTerm(a, "havnedistrikter", "havnedistrikt");
a.close();
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new NorwegianAnalyzer(), 1000*RANDOM_MULTIPLIER);
Analyzer analyzer = new NorwegianAnalyzer();
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
analyzer.close();
}
}

View File

@ -17,9 +17,7 @@ package org.apache.lucene.analysis.no;
* limitations under the License.
*/
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Files;
import java.util.Random;
@ -36,18 +34,29 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
/**
* Simple tests for {@link NorwegianLightStemFilter}
*/
public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, BOKMAAL));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, BOKMAAL));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a vocabulary file */
public void testVocabulary() throws IOException {
@ -64,6 +73,7 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
}
};
assertVocabulary(analyzer, Files.newInputStream(getDataPath("nn_light.txt")));
analyzer.close();
}
public void testKeyword() throws IOException {
@ -77,6 +87,7 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "sekretæren", "sekretæren");
a.close();
}
/** blast some random strings through the analyzer */
@ -94,5 +105,6 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -17,9 +17,7 @@ package org.apache.lucene.analysis.no;
* limitations under the License.
*/
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Files;
import java.util.Random;
@ -40,13 +38,25 @@ import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
* Simple tests for {@link NorwegianMinimalStemFilter}
*/
public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(source, BOKMAAL));
}
};
private Analyzer analyzer;
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(source, BOKMAAL));
}
};
}
@Override
public void tearDown() throws Exception {
analyzer.close();
super.tearDown();
}
/** Test against a Bokmål vocabulary file */
public void testVocabulary() throws IOException {
@ -63,6 +73,7 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
assertVocabulary(analyzer, Files.newInputStream(getDataPath("nn_minimal.txt")));
analyzer.close();
}
public void testKeyword() throws IOException {
@ -76,6 +87,7 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "sekretæren", "sekretæren");
a.close();
}
/** blast some random strings through the analyzer */
@ -93,5 +105,6 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
}
};
checkOneTerm(a, "", "");
a.close();
}
}

View File

@ -227,6 +227,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
};
// TODO: properly support positionLengthAttribute
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
a.close();
}
/** blast some random large strings through the analyzer */
@ -241,5 +242,6 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
};
// TODO: properly support positionLengthAttribute
checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false);
a.close();
}
}

View File

@ -17,14 +17,12 @@ package org.apache.lucene.analysis.path;
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import static org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer.DEFAULT_DELIMITER;
import static org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer.DEFAULT_SKIP;
@ -187,6 +185,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
};
// TODO: properly support positionLengthAttribute
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
a.close();
}
/** blast some random large strings through the analyzer */
@ -201,5 +200,6 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
};
// TODO: properly support positionLengthAttribute
checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false);
a.close();
}
}

View File

@ -16,7 +16,7 @@ package org.apache.lucene.analysis.pattern;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Pattern;
@ -606,6 +606,7 @@ public class TestPatternCaptureGroupTokenFilter extends BaseTokenStreamTestCase
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
}
private void testPatterns(String input, String[] regexes, String[] tokens,

Some files were not shown because too many files have changed in this diff Show More