mirror of https://github.com/apache/lucene.git
LUCENE-6271: sync up with trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1670257 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
05cf3fde0d
|
@ -2,7 +2,7 @@
|
|||
<library name="JUnit">
|
||||
<CLASSES>
|
||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
|
||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.9.jar!/" />
|
||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.12.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
import scriptutil
|
||||
from scriptutil import *
|
||||
|
||||
import argparse
|
||||
import io
|
||||
|
@ -36,7 +36,7 @@ def update_changes(filename, new_version):
|
|||
buffer.append(line)
|
||||
return match is not None
|
||||
|
||||
changed = scriptutil.update_file(filename, matcher, edit)
|
||||
changed = update_file(filename, matcher, edit)
|
||||
print('done' if changed else 'uptodate')
|
||||
|
||||
def add_constant(new_version, deprecate):
|
||||
|
@ -90,7 +90,7 @@ def add_constant(new_version, deprecate):
|
|||
buffer.append(line)
|
||||
return False
|
||||
|
||||
changed = scriptutil.update_file(filename, matcher, Edit())
|
||||
changed = update_file(filename, matcher, Edit())
|
||||
print('done' if changed else 'uptodate')
|
||||
|
||||
version_prop_re = re.compile('version\.base=(.*)')
|
||||
|
@ -103,7 +103,7 @@ def update_build_version(new_version):
|
|||
buffer.append('version.base=' + new_version.dot + '\n')
|
||||
return True
|
||||
|
||||
changed = scriptutil.update_file(filename, version_prop_re, edit)
|
||||
changed = update_file(filename, version_prop_re, edit)
|
||||
print('done' if changed else 'uptodate')
|
||||
|
||||
def update_latest_constant(new_version):
|
||||
|
@ -116,7 +116,7 @@ def update_latest_constant(new_version):
|
|||
buffer.append(line.rpartition('=')[0] + ('= %s;\n' % new_version.constant))
|
||||
return True
|
||||
|
||||
changed = scriptutil.update_file(filename, matcher, edit)
|
||||
changed = update_file(filename, matcher, edit)
|
||||
print('done' if changed else 'uptodate')
|
||||
|
||||
def update_example_solrconfigs(new_version):
|
||||
|
@ -139,7 +139,7 @@ def update_solrconfig(filename, matcher, new_version):
|
|||
buffer.append(line.replace(match.group(1), new_version.dot))
|
||||
return True
|
||||
|
||||
changed = scriptutil.update_file(filename, matcher, edit)
|
||||
changed = update_file(filename, matcher, edit)
|
||||
print('done' if changed else 'uptodate')
|
||||
|
||||
def check_lucene_version_tests():
|
||||
|
@ -165,7 +165,7 @@ def read_config():
|
|||
parser.add_argument('-r', '--downstream-repo', help='Path to downstream checkout for given changeid')
|
||||
c = parser.parse_args()
|
||||
|
||||
c.branch_type = scriptutil.find_branch_type()
|
||||
c.branch_type = find_branch_type()
|
||||
c.matching_branch = c.version.is_bugfix_release() and c.branch_type == 'release' or \
|
||||
c.version.is_minor_release() and c.branch_type == 'stable' or \
|
||||
c.branch_type == 'major'
|
||||
|
|
|
@ -48,8 +48,28 @@ New Features
|
|||
* LUCENE-6227: Added BooleanClause.Occur.FILTER to filter documents without
|
||||
participating in scoring (on the contrary to MUST). (Adrien Grand)
|
||||
|
||||
* LUCENE-6294: Added oal.search.CollectorManager to allow for parallelization
|
||||
of the document collection process on IndexSearcher. (Adrien Grand)
|
||||
|
||||
* LUCENE-6303: Added filter caching baked into IndexSearcher, disabled by
|
||||
default. (Adrien Grand)
|
||||
|
||||
* LUCENE-6304: Added a new MatchNoDocsQuery that matches no documents.
|
||||
(Lee Hinman via Adrien Grand)
|
||||
|
||||
* LUCENE-6341: Add a -fast option to CheckIndex. (Robert Muir)
|
||||
|
||||
* LUCENE-6355: IndexWriter's infoStream now also logs time to write FieldInfos
|
||||
during merge (Lee Hinman via Mike McCandless)
|
||||
|
||||
* LUCENE-6339: Added Near-real time Document Suggester via custom postings format
|
||||
(Areek Zillur, Mike McCandless, Simon Willnauer)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6368: FST.save can truncate output (BufferedOutputStream may be closed
|
||||
after the underlying stream). (Ippei Matsushima via Dawid Weiss)
|
||||
|
||||
* LUCENE-6249: StandardQueryParser doesn't support pure negative clauses.
|
||||
(Dawid Weiss)
|
||||
|
||||
|
@ -59,6 +79,16 @@ Bug Fixes
|
|||
* LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when
|
||||
object alignment was different from 8. (Uwe Schindler, Adrien Grand)
|
||||
|
||||
* LUCENE-6293: Fixed TimSorter bug. (Adrien Grand)
|
||||
|
||||
* LUCENE-6001: DrillSideways hits NullPointerException for certain
|
||||
BooleanQuery searches. (Dragan Jotannovic, jane chang via Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-6311: Fix NIOFSDirectory and SimpleFSDirectory so that the
|
||||
toString method of IndexInputs confess when they are from a compound
|
||||
file. (Robert Muir, Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
|
||||
|
@ -84,10 +114,10 @@ Optimizations
|
|||
* LUCENE-6233 Speed up CheckIndex when the index has term vectors
|
||||
(Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-6198: Added the TwoPhaseDocIdSetIterator API, exposed on scorers which
|
||||
* LUCENE-6198: Added the TwoPhaseIterator API, exposed on scorers which
|
||||
is for now only used on phrase queries and conjunctions in order to check
|
||||
positions lazily if the phrase query is in a conjunction with other queries.
|
||||
(Robert Muir, Adrien Grand)
|
||||
(Robert Muir, Adrien Grand, David Smiley)
|
||||
|
||||
* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
|
||||
minShouldMatch > 1 now either propagate or take advantage of the two-phase
|
||||
|
@ -108,6 +138,19 @@ Optimizations
|
|||
in order to advance doc IDs, which takes advantage of the cost() API.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-6290: QueryWrapperFilter propagates approximations and FilteredQuery
|
||||
rewrites to a BooleanQuery when the filter is a QueryWrapperFilter in order
|
||||
to leverage approximations. (Adrien Grand)
|
||||
|
||||
* LUCENE-6318: Reduce RAM usage of FieldInfos when there are many fields.
|
||||
(Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-6320: Speed up CheckIndex. (Robert Muir)
|
||||
|
||||
* LUCENE-4942: Optimized the encoding of PrefixTreeStrategy indexes for
|
||||
non-point data: 33% smaller index, 68% faster indexing, and 44% faster
|
||||
searching. YMMV (David Smiley)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
|
||||
|
@ -146,6 +189,9 @@ API Changes
|
|||
* LUCENE-6268: Replace FieldValueFilter and DocValuesRangeFilter with equivalent
|
||||
queries that support approximations. (Adrien Grand)
|
||||
|
||||
* LUCENE-6289: Replace DocValuesRangeFilter with DocValuesRangeQuery which
|
||||
supports approximations. (Adrien Grand)
|
||||
|
||||
* LUCENE-6266: Remove unnecessary Directory params from SegmentInfo.toString,
|
||||
SegmentInfos.files/toString, and SegmentCommitInfo.toString. (Robert Muir)
|
||||
|
||||
|
@ -159,6 +205,24 @@ API Changes
|
|||
* LUCENE-6286: Removed IndexSearcher methods that take a Filter object.
|
||||
A BooleanQuery with a filter clause must be used instead. (Adrien Grand)
|
||||
|
||||
* LUCENE-6300: PrefixFilter, TermRangeFilter and NumericRangeFilter have been
|
||||
removed. Use PrefixQuery, TermRangeQuery and NumericRangeQuery instead.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-6303: Replaced FilterCache with QueryCache and CachingWrapperFilter
|
||||
with CachingWrapperQuery. (Adrien Grand)
|
||||
|
||||
* LUCENE-6317: Deprecate DataOutput.writeStringSet and writeStringStringMap.
|
||||
Use writeSetOfStrings/Maps instead. (Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-6307: Rename SegmentInfo.getDocCount -> .maxDoc,
|
||||
SegmentInfos.totalDocCount -> .totalMaxDoc, MergeInfo.totalDocCount
|
||||
-> .totalMaxDoc and MergePolicy.OneMerge.totalDocCount ->
|
||||
.totalMaxDoc (Adrien Grand, Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-6367: PrefixQuery now subclasses AutomatonQuery, removing the
|
||||
specialized PrefixTermsEnum. (Robert Muir, Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6248: Remove unused odd constants from StandardSyntaxParser.jj
|
||||
|
@ -172,11 +236,20 @@ Other
|
|||
|
||||
* LUCENE-6292: Seed StringHelper better. (Robert Muir)
|
||||
|
||||
* LUCENE-6333: Refactored queries to delegate their equals and hashcode
|
||||
impls to the super class. (Lee Hinman via Adrien Grand)
|
||||
|
||||
* LUCENE-6343: DefaultSimilarity javadocs had the wrong float value to
|
||||
demonstrate precision of encoded norms (András Péteri via Mike McCandless)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-6255: PhraseQuery now ignores leading holes and requires that
|
||||
positions are positive and added in order. (Adrien Grand)
|
||||
|
||||
* LUCENE-6298: SimpleQueryParser returns an empty query rather than
|
||||
null, if e.g. the terms were all stopwords. (Lee Hinman via Robert Muir)
|
||||
|
||||
======================= Lucene 5.0.0 =======================
|
||||
|
||||
New Features
|
||||
|
@ -705,6 +778,40 @@ Bug fixes
|
|||
sorted (set) doc values instance at the same time.
|
||||
(Tom Shally, Robert Muir, Adrien Grand)
|
||||
|
||||
* LUCENE-6093: Don't throw NullPointerException from
|
||||
BlendedInfixSuggester for lookups that do not end in a prefix
|
||||
token. (jane chang via Mike McCandless)
|
||||
|
||||
* LUCENE-6279: Don't let an abusive leftover _N_upgraded.si in the
|
||||
index directory cause index corruption on upgrade (Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-6287: Fix concurrency bug in IndexWriter that could cause
|
||||
index corruption (missing _N.si files) the first time 4.x kisses a
|
||||
3.x index if merges are also running. (Simon Willnauer, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-6205: Fixed intermittent concurrency issue that could cause
|
||||
FileNotFoundException when writing doc values updates at the same
|
||||
time that a merge kicks off. (Mike McCandless)
|
||||
|
||||
* LUCENE-6214: Fixed IndexWriter deadlock when one thread is
|
||||
committing while another opens a near-real-time reader and an
|
||||
unrecoverable (tragic) exception is hit. (Simon Willnauer, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-6105: Don't cache FST root arcs if the number of root arcs is
|
||||
small, or if the cache would be > 20% of the size of the FST.
|
||||
(Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-6001: DrillSideways hits NullPointerException for certain
|
||||
BooleanQuery searches. (Dragan Jotannovic, jane chang via Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-6306: Merging of doc values and norms now checks whether the
|
||||
merge was aborted so IndexWriter.rollback can more promptly abort a
|
||||
running merge. (Robert Muir, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6212: Deprecate IndexWriter APIs that accept per-document Analyzer.
|
||||
|
|
|
@ -181,9 +181,9 @@ public final class StemmerOverrideFilter extends TokenFilter {
|
|||
charsSpare.grow(length);
|
||||
final char[] buffer = charsSpare.chars();
|
||||
for (int i = 0; i < length; ) {
|
||||
i += Character.toChars(
|
||||
Character.toLowerCase(
|
||||
Character.codePointAt(input, i)), buffer, i);
|
||||
i += Character.toChars(
|
||||
Character.toLowerCase(
|
||||
Character.codePointAt(input, i)), buffer, i);
|
||||
}
|
||||
spare.copyChars(buffer, 0, length);
|
||||
} else {
|
||||
|
|
|
@ -138,7 +138,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
};
|
||||
}
|
||||
|
||||
try {
|
||||
try (Analyzer a = analyzer) {
|
||||
String formatClass = format;
|
||||
if (format == null || format.equals("solr")) {
|
||||
formatClass = SolrSynonymParser.class.getName();
|
||||
|
@ -146,7 +146,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
formatClass = WordnetSynonymParser.class.getName();
|
||||
}
|
||||
// TODO: expose dedup as a parameter?
|
||||
map = loadSynonyms(loader, formatClass, true, analyzer);
|
||||
map = loadSynonyms(loader, formatClass, true, a);
|
||||
} catch (ParseException e) {
|
||||
throw new IOException("Error parsing synonyms file:", e);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ArabicAnalyzer();
|
||||
new ArabicAnalyzer().close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -53,6 +53,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
assertAnalyzesTo(a, "ما ملكت أيمانكم", new String[] { "ملكت", "ايمانكم"});
|
||||
assertAnalyzesTo(a, "الذين ملكت أيمانكم", new String[] { "ملكت", "ايمانكم" }); // stopwords
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -62,14 +63,17 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
ArabicAnalyzer a = new ArabicAnalyzer();
|
||||
assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
|
||||
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-arabic text gets treated in a similar way as SimpleAnalyzer.
|
||||
*/
|
||||
public void testEnglishInput() throws Exception {
|
||||
assertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] {
|
||||
ArabicAnalyzer a = new ArabicAnalyzer();
|
||||
assertAnalyzesTo(a, "English text.", new String[] {
|
||||
"english", "text" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -80,6 +84,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
ArabicAnalyzer a = new ArabicAnalyzer(set);
|
||||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testWithStemExclusionSet() throws IOException {
|
||||
|
@ -87,15 +92,18 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
ArabicAnalyzer a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
|
||||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
|
||||
|
||||
a.close();
|
||||
|
||||
a = new ArabicAnalyzer(CharArraySet.EMPTY_SET, CharArraySet.EMPTY_SET);
|
||||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
|
||||
assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new ArabicAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
ArabicAnalyzer a = new ArabicAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,6 +104,7 @@ public class TestArabicNormalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -141,5 +141,6 @@ public class TestArabicStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,24 +32,27 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This test fails with NPE when the stopwords file is missing in classpath
|
||||
*/
|
||||
public void testResourcesAvailable() {
|
||||
new BulgarianAnalyzer();
|
||||
new BulgarianAnalyzer().close();
|
||||
}
|
||||
|
||||
public void testStopwords() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer();
|
||||
assertAnalyzesTo(a, "Как се казваш?", new String[] {"казваш"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testCustomStopwords() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET);
|
||||
assertAnalyzesTo(a, "Как се казваш?",
|
||||
new String[] {"как", "се", "казваш"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws IOException {
|
||||
Analyzer a = new BulgarianAnalyzer();
|
||||
assertAnalyzesTo(a, "документи", new String[] {"документ"});
|
||||
assertAnalyzesTo(a, "документ", new String[] {"документ"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -64,6 +67,7 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "компютър", new String[] {"компютр"});
|
||||
|
||||
assertAnalyzesTo(a, "градове", new String[] {"град"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testWithStemExclusionSet() throws IOException {
|
||||
|
@ -71,10 +75,13 @@ public class TestBulgarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
set.add("строеве");
|
||||
Analyzer a = new BulgarianAnalyzer(CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new BulgarianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
BulgarianAnalyzer a = new BulgarianAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,6 +97,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "братя", new String[] {"брат"});
|
||||
assertAnalyzesTo(a, "братята", new String[] {"брат"});
|
||||
assertAnalyzesTo(a, "брате", new String[] {"брат"});
|
||||
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -109,6 +111,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "вестта", new String[] {"вест"});
|
||||
assertAnalyzesTo(a, "вести", new String[] {"вест"});
|
||||
assertAnalyzesTo(a, "вестите", new String[] {"вест"});
|
||||
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -138,6 +142,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "изключенията", new String[] {"изключени"});
|
||||
/* note the below form in this example does not conflate with the rest */
|
||||
assertAnalyzesTo(a, "изключения", new String[] {"изключн"});
|
||||
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -154,6 +160,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "красивото", new String[] {"красив"});
|
||||
assertAnalyzesTo(a, "красиви", new String[] {"красив"});
|
||||
assertAnalyzesTo(a, "красивите", new String[] {"красив"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -212,6 +219,8 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
/* note the below forms conflate with each other, but not the rest */
|
||||
assertAnalyzesTo(a, "строя", new String[] {"стр"});
|
||||
assertAnalyzesTo(a, "строят", new String[] {"стр"});
|
||||
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
|
@ -234,5 +243,6 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -135,12 +135,14 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkReuse(a, "boainain", "boainain");
|
||||
checkReuse(a, "boas", "boas");
|
||||
checkReuse(a, "bôas", "boas"); // removes diacritic: different from snowball portugese
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testStemExclusionTable() throws Exception {
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(
|
||||
CharArraySet.EMPTY_SET, new CharArraySet(asSet("quintessência"), false));
|
||||
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
|
@ -154,7 +156,9 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new BrazilianAnalyzer(), input, expected);
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer();
|
||||
checkOneTerm(a, input, expected);
|
||||
a.close();
|
||||
}
|
||||
|
||||
private void checkReuse(Analyzer a, String input, String expected) throws Exception {
|
||||
|
@ -163,7 +167,9 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new BrazilianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -175,5 +181,6 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new CatalanAnalyzer();
|
||||
new CatalanAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "llengua", "llengu");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "un", new String[] { });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of elisionfilter */
|
||||
|
@ -45,6 +46,7 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new CatalanAnalyzer();
|
||||
assertAnalyzesTo(a, "Diccionari de l'Institut d'Estudis Catalans",
|
||||
new String[] { "diccion", "inst", "estud", "catalan" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -53,10 +55,13 @@ public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "llengües", "llengües");
|
||||
checkOneTerm(a, "llengua", "llengu");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new CatalanAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
CatalanAnalyzer a = new CatalanAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.io.StringReader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -403,16 +402,22 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testRandom() throws Exception {
|
||||
int numRounds = RANDOM_MULTIPLIER * 1000;
|
||||
checkRandomData(random(), newTestAnalyzer(), numRounds);
|
||||
Analyzer a = newTestAnalyzer();
|
||||
checkRandomData(random(), a, numRounds);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
int numRounds = RANDOM_MULTIPLIER * 100;
|
||||
checkRandomData(random(), newTestAnalyzer(), numRounds, 8192);
|
||||
Analyzer a = newTestAnalyzer();
|
||||
checkRandomData(random(), a, numRounds, 8192);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testCloseBR() throws Exception {
|
||||
checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), " Secretary)</br> [[M");
|
||||
Analyzer a = newTestAnalyzer();
|
||||
checkAnalysisConsistency(random(), a, random().nextBoolean(), " Secretary)</br> [[M");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testServerSideIncludes() throws Exception {
|
||||
|
@ -549,7 +554,9 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
|
|||
public void testRandomBrokenHTML() throws Exception {
|
||||
int maxNumElements = 10000;
|
||||
String text = TestUtil.randomHtmlishString(random(), maxNumElements);
|
||||
checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), text);
|
||||
Analyzer a = newTestAnalyzer();
|
||||
checkAnalysisConsistency(random(), a, random().nextBoolean(), text);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testRandomText() throws Exception {
|
||||
|
@ -617,6 +624,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(analyzer, " �", new String[] { "\uFFFD" } );
|
||||
assertAnalyzesTo(analyzer, " �", new String[] { "\uFFFD" } );
|
||||
assertAnalyzesTo(analyzer, " �<br>", new String[] { "�" } );
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -216,6 +216,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
int numRounds = RANDOM_MULTIPLIER * 10000;
|
||||
checkRandomData(random(), analyzer, numRounds);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
//@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
|
||||
|
@ -242,6 +243,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
String text = "gzw f quaxot";
|
||||
checkAnalysisConsistency(random(), analyzer, false, text);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
//@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
|
||||
|
@ -263,6 +265,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
int numRounds = 100;
|
||||
checkRandomData(random(), analyzer, numRounds);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.cjk;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -39,7 +38,19 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
* Most tests adopted from TestCJKTokenizer
|
||||
*/
|
||||
public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new CJKAnalyzer();
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new CJKAnalyzer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testJa1() throws IOException {
|
||||
assertAnalyzesTo(analyzer, "一二三四五六七八九十",
|
||||
|
@ -228,6 +239,8 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
|
|||
// before bigramming, the 4 tokens look like:
|
||||
// { 0, 0, 1, 1 },
|
||||
// { 0, 1, 1, 2 }
|
||||
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
private static class FakeStandardTokenizer extends TokenFilter {
|
||||
|
@ -267,17 +280,21 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
|
|||
new int[] { 1 },
|
||||
new String[] { "<SINGLE>" },
|
||||
new int[] { 1 });
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new CJKAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new CJKAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Random random = random();
|
||||
checkRandomData(random, new CJKAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
|
||||
Analyzer a = new CJKAnalyzer();
|
||||
checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -289,5 +306,6 @@ public class TestCJKAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,31 +17,42 @@ package org.apache.lucene.analysis.cjk;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new StandardTokenizer();
|
||||
return new TokenStreamComponents(t, new CJKBigramFilter(t));
|
||||
}
|
||||
};
|
||||
Analyzer analyzer, unibiAnalyzer;
|
||||
|
||||
Analyzer unibiAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new StandardTokenizer();
|
||||
return new TokenStreamComponents(t,
|
||||
new CJKBigramFilter(t, 0xff, true));
|
||||
}
|
||||
};
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new StandardTokenizer();
|
||||
return new TokenStreamComponents(t, new CJKBigramFilter(t));
|
||||
}
|
||||
};
|
||||
unibiAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new StandardTokenizer();
|
||||
return new TokenStreamComponents(t,
|
||||
new CJKBigramFilter(t, 0xff, true));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
IOUtils.close(analyzer, unibiAnalyzer);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testHuge() throws Exception {
|
||||
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||
|
@ -79,6 +90,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
|
||||
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
|
||||
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testAllScripts() throws Exception {
|
||||
|
@ -92,6 +104,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testUnigramsAndBigramsAllScripts() throws Exception {
|
||||
|
@ -132,6 +145,7 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
|
||||
new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
||||
new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testUnigramsAndBigramsHuge() throws Exception {
|
||||
|
|
|
@ -29,13 +29,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Tests for {@link CJKWidthFilter}
|
||||
*/
|
||||
public class TestCJKWidthFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new CJKWidthFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new CJKWidthFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Full-width ASCII forms normalized to half-width (basic latin)
|
||||
|
@ -74,5 +86,6 @@ public class TestCJKWidthFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,24 +32,27 @@ public class TestSoraniAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This test fails with NPE when the stopwords file is missing in classpath
|
||||
*/
|
||||
public void testResourcesAvailable() {
|
||||
new SoraniAnalyzer();
|
||||
new SoraniAnalyzer().close();
|
||||
}
|
||||
|
||||
public void testStopwords() throws IOException {
|
||||
Analyzer a = new SoraniAnalyzer();
|
||||
assertAnalyzesTo(a, "ئەم پیاوە", new String[] {"پیاو"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testCustomStopwords() throws IOException {
|
||||
Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET);
|
||||
assertAnalyzesTo(a, "ئەم پیاوە",
|
||||
new String[] {"ئەم", "پیاو"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws IOException {
|
||||
Analyzer a = new SoraniAnalyzer();
|
||||
assertAnalyzesTo(a, "پیاوە", new String[] {"پیاو"});
|
||||
assertAnalyzesTo(a, "پیاو", new String[] {"پیاو"});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testWithStemExclusionSet() throws IOException {
|
||||
|
@ -57,10 +60,13 @@ public class TestSoraniAnalyzer extends BaseTokenStreamTestCase {
|
|||
set.add("پیاوە");
|
||||
Analyzer a = new SoraniAnalyzer(CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "پیاوە", new String[] { "پیاوە" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new SoraniAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new SoraniAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ckb;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -30,13 +29,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Tests normalization for Sorani (this is more critical than stemming...)
|
||||
*/
|
||||
public class TestSoraniNormalizationFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testY() throws Exception {
|
||||
checkOneTerm(a, "\u064A", "\u06CC");
|
||||
|
@ -96,5 +107,6 @@ public class TestSoraniNormalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ckb;
|
|||
import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -31,7 +30,19 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Test the Sorani Stemmer.
|
||||
*/
|
||||
public class TestSoraniStemFilter extends BaseTokenStreamTestCase {
|
||||
SoraniAnalyzer a = new SoraniAnalyzer();
|
||||
Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new SoraniAnalyzer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testIndefiniteSingular() throws Exception {
|
||||
checkOneTerm(a, "پیاوێک", "پیاو"); // -ek
|
||||
|
@ -90,6 +101,7 @@ public class TestSoraniStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test against a basic vocabulary file */
|
||||
|
|
|
@ -156,6 +156,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
new String[] { "s_s", "s_s" });
|
||||
assertAnalyzesTo(a, "of the of",
|
||||
new String[] { "of_the", "the_of" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testCommonGramsFilter() throws Exception {
|
||||
|
@ -242,6 +243,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "of the of",
|
||||
new String[] { "of", "of_the", "the", "the_of", "of" },
|
||||
new int[] { 1, 0, 1, 0, 1 });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -330,6 +332,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
};
|
||||
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
|
||||
Analyzer b = new Analyzer() {
|
||||
|
||||
|
@ -342,5 +345,6 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
};
|
||||
|
||||
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
|
||||
b.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -336,6 +336,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
new String[] { "bankueberfall", "fall" },
|
||||
new int[] { 0, 0 },
|
||||
new int[] { 12, 12 });
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -350,6 +351,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
|
||||
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
|
||||
final HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
|
||||
|
@ -363,6 +365,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
|
||||
b.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws Exception {
|
||||
|
@ -376,6 +379,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
|
||||
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
|
||||
final HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
|
||||
|
@ -389,5 +393,6 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(b, "", "");
|
||||
b.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,15 +18,18 @@ package org.apache.lucene.analysis.core;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
public class TestAnalyzers extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -48,6 +51,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
new String[] { "b" });
|
||||
assertAnalyzesTo(a, "\"QUOTED\" word",
|
||||
new String[] { "quoted", "word" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testNull() throws Exception {
|
||||
|
@ -68,6 +72,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
new String[] { "2B" });
|
||||
assertAnalyzesTo(a, "\"QUOTED\" word",
|
||||
new String[] { "\"QUOTED\"", "word" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testStop() throws Exception {
|
||||
|
@ -76,6 +81,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
new String[] { "foo", "bar", "foo", "bar" });
|
||||
assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
|
||||
new String[] { "foo", "bar", "foo", "bar" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
void verifyPayload(TokenStream ts) throws IOException {
|
||||
|
@ -159,6 +165,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
// unpaired trail surrogate
|
||||
assertAnalyzesTo(a, "AbaC\uDC16AdaBa",
|
||||
new String [] { "abac\uDC16adaba" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -179,9 +186,9 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
// unpaired trail surrogate
|
||||
assertAnalyzesTo(a, "AbaC\uDC16AdaBa",
|
||||
new String [] { "ABAC\uDC16ADABA" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test that LowercaseFilter handles the lowercasing correctly if the term
|
||||
* buffer has a trailing surrogate character leftover and the current term in
|
||||
|
@ -223,17 +230,20 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new WhitespaceAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
checkRandomData(random(), new SimpleAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
checkRandomData(random(), new StopAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StopAnalyzer() };
|
||||
for (Analyzer analyzer : analyzers) {
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
IOUtils.close(analyzers);
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
public void testRandomHugeStrings() throws Exception {
|
||||
Random random = random();
|
||||
checkRandomData(random, new WhitespaceAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
|
||||
checkRandomData(random, new SimpleAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
|
||||
checkRandomData(random, new StopAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
|
||||
Analyzer analyzers[] = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StopAnalyzer() };
|
||||
for (Analyzer analyzer : analyzers) {
|
||||
checkRandomData(random(), analyzer, 100*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
IOUtils.close(analyzers);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -75,6 +75,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
|
||||
a.close();
|
||||
}
|
||||
|
||||
CharFilter wrappedStream = new CharFilter(new StringReader("bogus")) {
|
||||
|
@ -261,6 +262,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), analyzer, 2000);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testCuriousWikipediaString() throws Exception {
|
||||
|
@ -285,5 +287,6 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkAnalysisConsistency(random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
@ -78,6 +79,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
|
||||
right.tokenStream("foo", newStringReader(s)));
|
||||
}
|
||||
IOUtils.close(left, right);
|
||||
}
|
||||
|
||||
// not so useful since it's all one token?!
|
||||
|
@ -99,6 +101,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
|
||||
right.tokenStream("foo", newStringReader(s)));
|
||||
}
|
||||
IOUtils.close(left, right);
|
||||
}
|
||||
|
||||
public void testLetterHtmlish() throws Exception {
|
||||
|
@ -116,6 +119,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
|
||||
right.tokenStream("foo", newStringReader(s)));
|
||||
}
|
||||
IOUtils.close(left, right);
|
||||
}
|
||||
|
||||
public void testLetterHtmlishHuge() throws Exception {
|
||||
|
@ -136,6 +140,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
|
||||
right.tokenStream("foo", newStringReader(s)));
|
||||
}
|
||||
IOUtils.close(left, right);
|
||||
}
|
||||
|
||||
public void testLetterUnicode() throws Exception {
|
||||
|
@ -153,6 +158,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
|
||||
right.tokenStream("foo", newStringReader(s)));
|
||||
}
|
||||
IOUtils.close(left, right);
|
||||
}
|
||||
|
||||
public void testLetterUnicodeHuge() throws Exception {
|
||||
|
@ -173,6 +179,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertEquals(s, left.tokenStream("foo", newStringReader(s)),
|
||||
right.tokenStream("foo", newStringReader(s)));
|
||||
}
|
||||
IOUtils.close(left, right);
|
||||
}
|
||||
|
||||
// we only check a few core attributes here.
|
||||
|
|
|
@ -46,6 +46,8 @@ import org.apache.lucene.util.Version;
|
|||
// TODO: move this, TestRandomChains, and TestAllAnalyzersHaveFactories
|
||||
// to an integration test module that sucks in all analysis modules.
|
||||
// currently the only way to do this is via eclipse etc (LUCENE-3974)
|
||||
|
||||
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
||||
public class TestFactories extends BaseTokenStreamTestCase {
|
||||
public void test() throws IOException {
|
||||
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
||||
|
@ -77,7 +79,9 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 20, 20, false, false);
|
||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,7 +101,9 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 20, 20, false, false);
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,7 +123,9 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 20, 20, false, false);
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -33,23 +34,24 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
private Directory directory;
|
||||
private IndexSearcher searcher;
|
||||
private IndexReader reader;
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(new SimpleAnalyzer()));
|
||||
analyzer = new SimpleAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("partnum", "Q36", Field.Store.YES));
|
||||
|
@ -59,13 +61,11 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
writer.close();
|
||||
|
||||
reader = DirectoryReader.open(directory);
|
||||
searcher = newSearcher(reader);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
reader.close();
|
||||
directory.close();
|
||||
IOUtils.close(analyzer, reader, directory);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
|
@ -86,7 +86,8 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testMutipleDocument() throws Exception {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
|
||||
Analyzer analyzer = new KeywordAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("partnum", "Q36", Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
|
@ -112,11 +113,13 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
null,
|
||||
0);
|
||||
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
// LUCENE-1441
|
||||
public void testOffsets() throws Exception {
|
||||
try (TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"))) {
|
||||
try (Analyzer analyzer = new KeywordAnalyzer();
|
||||
TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd"))) {
|
||||
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
||||
stream.reset();
|
||||
assertTrue(stream.incrementToken());
|
||||
|
@ -129,6 +132,8 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new KeywordAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new KeywordAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -901,16 +901,17 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
|
||||
Random random = random();
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
|
||||
if (VERBOSE) {
|
||||
System.out.println("Creating random analyzer:" + a);
|
||||
}
|
||||
try {
|
||||
checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false,
|
||||
false /* We already validate our own offsets... */);
|
||||
} catch (Throwable e) {
|
||||
System.err.println("Exception from random analyzer: " + a);
|
||||
throw e;
|
||||
try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("Creating random analyzer:" + a);
|
||||
}
|
||||
try {
|
||||
checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false,
|
||||
false /* We already validate our own offsets... */);
|
||||
} catch (Throwable e) {
|
||||
System.err.println("Exception from random analyzer: " + a);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -920,16 +921,17 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
|
||||
Random random = random();
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
|
||||
if (VERBOSE) {
|
||||
System.out.println("Creating random analyzer:" + a);
|
||||
}
|
||||
try {
|
||||
checkRandomData(random, a, 50*RANDOM_MULTIPLIER, 80, false,
|
||||
false /* We already validate our own offsets... */);
|
||||
} catch (Throwable e) {
|
||||
System.err.println("Exception from random analyzer: " + a);
|
||||
throw e;
|
||||
try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("Creating random analyzer:" + a);
|
||||
}
|
||||
try {
|
||||
checkRandomData(random, a, 50*RANDOM_MULTIPLIER, 80, false,
|
||||
false /* We already validate our own offsets... */);
|
||||
} catch (Throwable e) {
|
||||
System.err.println("Exception from random analyzer: " + a);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import java.util.HashSet;
|
|||
|
||||
public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
private StopAnalyzer stop = new StopAnalyzer();
|
||||
private StopAnalyzer stop;
|
||||
private Set<Object> inValidTokens = new HashSet<>();
|
||||
|
||||
@Override
|
||||
|
@ -41,6 +41,13 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
while(it.hasNext()) {
|
||||
inValidTokens.add(it.next());
|
||||
}
|
||||
stop = new StopAnalyzer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
stop.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testDefaults() throws IOException {
|
||||
|
@ -71,6 +78,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
stream.end();
|
||||
}
|
||||
newStop.close();
|
||||
}
|
||||
|
||||
public void testStopListPositions() throws IOException {
|
||||
|
@ -92,6 +100,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
stream.end();
|
||||
}
|
||||
newStop.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "föó bär FÖÖ BAR",
|
||||
new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testHtmlStripClassicFolding() throws Exception {
|
||||
|
@ -93,6 +94,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "<p><b>föó</b> bär FÖÖ BAR</p>",
|
||||
new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1});
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testStopWordsFromClasspath() throws Exception {
|
||||
|
@ -114,6 +116,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertSame(Version.LATEST, a.getVersion());
|
||||
|
||||
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testStopWordsFromClasspathWithMap() throws Exception {
|
||||
|
@ -141,6 +144,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
} catch (IllegalArgumentException | UnsupportedOperationException e) {
|
||||
// pass
|
||||
}
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testStopWordsFromFile() throws Exception {
|
||||
|
@ -152,6 +156,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
"format", "wordset")
|
||||
.build();
|
||||
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testStopWordsFromFileAbsolute() throws Exception {
|
||||
|
@ -163,6 +168,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
"format", "wordset")
|
||||
.build();
|
||||
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
|
||||
a.close();
|
||||
}
|
||||
|
||||
// Now test misconfigurations:
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cz;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
@ -31,15 +32,24 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
*/
|
||||
public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new CzechAnalyzer().close();
|
||||
}
|
||||
|
||||
public void testStopWord() throws Exception {
|
||||
assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem",
|
||||
Analyzer analyzer = new CzechAnalyzer();
|
||||
assertAnalyzesTo(analyzer, "Pokud mluvime o volnem",
|
||||
new String[] { "mluvim", "voln" });
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CzechAnalyzer();
|
||||
assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
|
||||
assertAnalyzesTo(analyzer, "Česká Republika", new String[] { "česk", "republik" });
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testWithStemExclusionSet() throws IOException{
|
||||
|
@ -47,10 +57,13 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
set.add("hole");
|
||||
CzechAnalyzer cz = new CzechAnalyzer(CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new CzechAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new CzechAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.cz;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -103,6 +102,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(cz, "soudcům", new String[] { "soudk" });
|
||||
assertAnalyzesTo(cz, "soudcích", new String[] { "soudk" });
|
||||
assertAnalyzesTo(cz, "soudcem", new String[] { "soudk" });
|
||||
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -147,6 +148,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(cz, "ženách", new String[] { "žn" });
|
||||
assertAnalyzesTo(cz, "ženou", new String[] { "žn" });
|
||||
assertAnalyzesTo(cz, "ženami", new String[] { "žn" });
|
||||
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -189,7 +192,9 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(cz, "stavení", new String[] { "stavn" });
|
||||
assertAnalyzesTo(cz, "stavením", new String[] { "stavn" });
|
||||
assertAnalyzesTo(cz, "staveních", new String[] { "stavn" });
|
||||
assertAnalyzesTo(cz, "staveními", new String[] { "stavn" });
|
||||
assertAnalyzesTo(cz, "staveními", new String[] { "stavn" });
|
||||
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -218,6 +223,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(cz, "jarnímu", new String[] { "jarn" });
|
||||
assertAnalyzesTo(cz, "jarním", new String[] { "jarn" });
|
||||
assertAnalyzesTo(cz, "jarními", new String[] { "jarn" });
|
||||
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -227,6 +234,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
CzechAnalyzer cz = new CzechAnalyzer();
|
||||
assertAnalyzesTo(cz, "Karlův", new String[] { "karl" });
|
||||
assertAnalyzesTo(cz, "jazykový", new String[] { "jazyk" });
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -267,6 +275,8 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
/* rewrite of e* -> * */
|
||||
assertAnalyzesTo(cz, "deska", new String[] { "desk" });
|
||||
assertAnalyzesTo(cz, "desek", new String[] { "desk" });
|
||||
|
||||
cz.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -276,6 +286,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
CzechAnalyzer cz = new CzechAnalyzer();
|
||||
assertAnalyzesTo(cz, "e", new String[] { "e" });
|
||||
assertAnalyzesTo(cz, "zi", new String[] { "zi" });
|
||||
cz.close();
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
|
@ -297,6 +308,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new DanishAnalyzer();
|
||||
new DanishAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "undersøgelse", "undersøg");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "på", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
DanishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "undersøgelse", "undersøgelse");
|
||||
checkOneTerm(a, "undersøg", "undersøg");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new DanishAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new DanishAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "Tisch", "tisch");
|
||||
checkOneTerm(a, "Tische", "tisch");
|
||||
checkOneTerm(a, "Tischen", "tisch");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testWithKeywordAttribute() throws IOException {
|
||||
|
@ -48,6 +49,7 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
GermanAnalyzer a = new GermanAnalyzer( CharArraySet.EMPTY_SET,
|
||||
new CharArraySet( asSet("tischen"), false));
|
||||
checkOneTerm(a, "tischen", "tischen");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test some features of the new snowball filter
|
||||
|
@ -58,10 +60,13 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
|
|||
// a/o/u + e is equivalent to the umlaut form
|
||||
checkOneTerm(a, "Schaltflächen", "schaltflach");
|
||||
checkOneTerm(a, "Schaltflaechen", "schaltflach");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new GermanAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
GermanAnalyzer a = new GermanAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.de;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link GermanLightStemFilter}
|
||||
*/
|
||||
public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -59,6 +70,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "sängerinnen", "sängerinnen");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -75,5 +87,6 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.de;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link GermanMinimalStemFilter}
|
||||
*/
|
||||
public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test some examples from the paper */
|
||||
public void testExamples() throws IOException {
|
||||
|
@ -66,6 +77,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "sängerinnen", "sängerinnen");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
|
@ -87,5 +99,6 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.de;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -31,14 +30,26 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Tests {@link GermanNormalizationFilter}
|
||||
*/
|
||||
public class TestGermanNormalizationFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field) {
|
||||
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
final TokenStream stream = new GermanNormalizationFilter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field) {
|
||||
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
final TokenStream stream = new GermanNormalizationFilter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that a/o/u + e is equivalent to the umlaut form
|
||||
|
@ -76,5 +87,6 @@ public class TestGermanNormalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
|
@ -39,14 +40,26 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
*
|
||||
*/
|
||||
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(t,
|
||||
new GermanStemFilter(new LowerCaseFilter(t)));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(t,
|
||||
new GermanStemFilter(new LowerCaseFilter(t)));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testStemming() throws Exception {
|
||||
InputStream vocOut = getClass().getResourceAsStream("data.txt");
|
||||
|
@ -65,6 +78,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "sängerinnen", "sängerinnen");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -81,5 +95,6 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
// as well as the elimination of stop words
|
||||
assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
|
||||
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
|
@ -62,10 +63,13 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
// as well as the elimination of stop words
|
||||
assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι",
|
||||
new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new GreekAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new GreekAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.el;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -26,7 +25,19 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
public class TestGreekStemmer extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new GreekAnalyzer();
|
||||
private Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new GreekAnalyzer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testMasculineNouns() throws Exception {
|
||||
// -ος
|
||||
|
@ -537,5 +548,6 @@ public class TestGreekStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new EnglishAnalyzer();
|
||||
new EnglishAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -42,6 +42,7 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "steven's", "steven");
|
||||
checkOneTerm(a, "steven\u2019s", "steven");
|
||||
checkOneTerm(a, "steven\uFF07s", "steven");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -51,10 +52,13 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
|
|||
EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "books", "books");
|
||||
checkOneTerm(a, "book", "book");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new EnglishAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new EnglishAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.en;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -30,14 +29,26 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Simple tests for {@link EnglishMinimalStemFilter}
|
||||
*/
|
||||
public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test some examples from various papers about this technique */
|
||||
public void testExamples() throws IOException {
|
||||
checkOneTerm(analyzer, "queries", "query");
|
||||
|
@ -65,5 +76,6 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.en;
|
|||
import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -32,13 +31,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Tests for {@link KStemmer}
|
||||
*/
|
||||
public class TestKStemmer extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
private Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
|
@ -63,6 +74,7 @@ public class TestKStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/****** requires original java kstem source code to create map
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.en;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -36,13 +35,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Test the PorterStemFilter with Martin Porter's test data.
|
||||
*/
|
||||
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new MockTokenizer( MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
||||
}
|
||||
};
|
||||
private Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer t = new MockTokenizer( MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the stemmer against all strings in voc.txt
|
||||
|
@ -75,5 +86,6 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new SpanishAnalyzer();
|
||||
new SpanishAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "chicano", "chican");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "los", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
|
|||
SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "chicana", "chican");
|
||||
checkOneTerm(a, "chicano", "chicano");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new SpanishAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new SpanishAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.es;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -32,14 +31,26 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link SpanishLightStemFilter}
|
||||
*/
|
||||
public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
public void testVocabulary() throws IOException {
|
||||
assertVocabulary(analyzer, getDataPath("eslighttestdata.zip"), "eslight.txt");
|
||||
|
@ -59,5 +70,6 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new BasqueAnalyzer();
|
||||
new BasqueAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "zaldiak", "zaldi");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "izan", new String[] { });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
|
|||
BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "zaldiak", "zaldiak");
|
||||
checkOneTerm(a, "mendiari", "mendi");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new BasqueAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new BasqueAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This test fails with NPE when the stopwords file is missing in classpath
|
||||
*/
|
||||
public void testResourcesAvailable() {
|
||||
new PersianAnalyzer();
|
||||
new PersianAnalyzer().close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -105,6 +105,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
// active present subjunctive
|
||||
assertAnalyzesTo(a, "بخورد", new String[] { "بخورد" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -181,6 +182,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
// active present subjunctive
|
||||
assertAnalyzesTo(a, "بخورد", new String[] { "بخورد" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -192,6 +194,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new PersianAnalyzer();
|
||||
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
|
||||
assertAnalyzesTo(a, "برگها", new String[] { "برگ" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -201,6 +204,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testBehaviorNonPersian() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer();
|
||||
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -210,6 +214,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new PersianAnalyzer();
|
||||
assertAnalyzesTo(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
|
||||
assertAnalyzesTo(a, "برگها", new String[] { "برگ" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -220,10 +225,13 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
new CharArraySet( asSet("the", "and", "a"), false));
|
||||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new PersianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
PersianAnalyzer a = new PersianAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,17 +24,29 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
||||
public class TestPersianCharFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new MockTokenizer());
|
||||
}
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new MockTokenizer());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader initReader(String fieldName, Reader reader) {
|
||||
return new PersianCharFilter(reader);
|
||||
}
|
||||
};
|
||||
@Override
|
||||
protected Reader initReader(String fieldName, Reader reader) {
|
||||
return new PersianCharFilter(reader);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testBasics() throws Exception {
|
||||
assertAnalyzesTo(analyzer, "this is a\u200Ctest",
|
||||
|
|
|
@ -18,8 +18,6 @@ package org.apache.lucene.analysis.fa;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -73,6 +71,7 @@ public class TestPersianNormalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new FinnishAnalyzer();
|
||||
new FinnishAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "edeltäjistään", "edeltäj");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "olla", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
|
|||
FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "edeltäjiinsä", "edeltäj");
|
||||
checkOneTerm(a, "edeltäjistään", "edeltäjistään");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new FinnishAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new FinnishAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link FinnishLightStemFilter}
|
||||
*/
|
||||
public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -58,6 +70,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "edeltäjistään", "edeltäjistään");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -74,5 +87,6 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
fa,
|
||||
"33Bis 1940-1945 1940:1945 (---i+++)*",
|
||||
new String[] { "33bi", "1940", "1945", "1940", "1945", "i" });
|
||||
|
||||
fa.close();
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
|
@ -132,6 +132,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
"chist",
|
||||
"element",
|
||||
"captif" });
|
||||
fa.close();
|
||||
}
|
||||
|
||||
public void testExclusionTableViaCtor() throws Exception {
|
||||
|
@ -141,15 +142,18 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
|
||||
"chist" });
|
||||
fa.close();
|
||||
|
||||
fa = new FrenchAnalyzer( CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
|
||||
"chist" });
|
||||
fa.close();
|
||||
}
|
||||
|
||||
public void testElision() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer();
|
||||
assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouil" });
|
||||
fa.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -158,11 +162,14 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testStopwordsCasing() throws IOException {
|
||||
FrenchAnalyzer a = new FrenchAnalyzer();
|
||||
assertAnalyzesTo(a, "Votre", new String[] { });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new FrenchAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new FrenchAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test accent-insensitive */
|
||||
|
@ -170,5 +177,6 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new FrenchAnalyzer();
|
||||
checkOneTerm(a, "sécuritaires", "securitair");
|
||||
checkOneTerm(a, "securitaires", "securitair");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.fr;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link FrenchLightStemFilter}
|
||||
*/
|
||||
public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer( MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer( MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test some examples from the paper */
|
||||
public void testExamples() throws IOException {
|
||||
|
@ -189,6 +200,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "chevaux", "chevaux");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -205,5 +217,6 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.fr;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -35,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link FrenchMinimalStemFilter}
|
||||
*/
|
||||
public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test some examples from the paper */
|
||||
public void testExamples() throws IOException {
|
||||
|
@ -68,6 +79,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "chevaux", "chevaux");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
|
@ -89,5 +101,6 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new IrishAnalyzer();
|
||||
new IrishAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "síceapatacha", "síceapaite");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "le", new String[] { });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of elisionfilter */
|
||||
|
@ -45,6 +46,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new IrishAnalyzer();
|
||||
assertAnalyzesTo(a, "b'fhearr m'athair",
|
||||
new String[] { "fearr", "athair" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -54,6 +56,7 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
|
|||
IrishAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "feirmeoireacht", "feirmeoireacht");
|
||||
checkOneTerm(a, "siopadóireacht", "siopadóir");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test special hyphen handling */
|
||||
|
@ -62,10 +65,13 @@ public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "n-athair",
|
||||
new String[] { "athair" },
|
||||
new int[] { 2 });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new IrishAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer a = new IrishAnalyzer();
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,5 +48,6 @@ public class TestIrishLowerCaseFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new GalicianAnalyzer();
|
||||
new GalicianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "corresponderá", "correspond");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "e", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
|
|||
GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "correspondente", "correspondente");
|
||||
checkOneTerm(a, "corresponderá", "correspond");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new GalicianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new GalicianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.gl;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -33,13 +32,25 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
* Simple tests for {@link GalicianMinimalStemmer}
|
||||
*/
|
||||
public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
private Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testPlural() throws Exception {
|
||||
checkOneTerm(a, "elefantes", "elefante");
|
||||
|
@ -64,6 +75,7 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "elefantes", "elefantes");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -80,5 +92,6 @@ public class TestGalicianMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,29 +20,36 @@ package org.apache.lucene.analysis.gl;
|
|||
import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
* Simple tests for {@link GalicianStemFilter}
|
||||
*/
|
||||
public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GalicianStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GalicianStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -58,5 +65,6 @@ public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new HindiAnalyzer();
|
||||
new HindiAnalyzer().close();
|
||||
}
|
||||
|
||||
public void testBasics() throws Exception {
|
||||
|
@ -36,6 +36,7 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
|
|||
// two ways to write 'hindi' itself.
|
||||
checkOneTerm(a, "हिन्दी", "हिंद");
|
||||
checkOneTerm(a, "हिंदी", "हिंद");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testExclusionSet() throws Exception {
|
||||
|
@ -43,10 +44,13 @@ public class TestHindiAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new HindiAnalyzer(
|
||||
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "हिंदी", "हिंदी");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new HindiAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new HindiAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,5 +76,6 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,5 +95,6 @@ public class TestHindiStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new HungarianAnalyzer();
|
||||
new HungarianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "babakocsijáért", "babakocs");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "által", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
|
|||
HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "babakocsi", "babakocsi");
|
||||
checkOneTerm(a, "babakocsijáért", "babakocs");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new HungarianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new HungarianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,13 +34,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link HungarianLightStemFilter}
|
||||
*/
|
||||
public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -58,6 +70,7 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "babakocsi", "babakocsi");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -69,5 +82,6 @@ public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -98,6 +99,7 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testIgnoreCaseNoSideEffects() throws Exception {
|
||||
|
@ -118,5 +120,6 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ArmenianAnalyzer();
|
||||
new ArmenianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "արծիվներ", "արծ");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "է", new String[] { });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
|
|||
ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "արծիվներ", "արծիվներ");
|
||||
checkOneTerm(a, "արծիվ", "արծ");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new ArmenianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new ArmenianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new IndonesianAnalyzer();
|
||||
new IndonesianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "pembunuhan", "bunuh");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "bahwa", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
|
|||
IndonesianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "peledakan", "peledakan");
|
||||
checkOneTerm(a, "pembunuhan", "bunuh");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new IndonesianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new IndonesianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,26 +18,46 @@ package org.apache.lucene.analysis.id;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Tests {@link IndonesianStemmer}
|
||||
*/
|
||||
public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
|
||||
/* full stemming, no stopwords */
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
private Analyzer a, b;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
/* full stemming, no stopwords */
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
/* inflectional-only stemming */
|
||||
b = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
IOUtils.close(a, b);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Some examples from the paper */
|
||||
public void testExamples() throws IOException {
|
||||
|
@ -111,15 +131,6 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "kecelakaan", "celaka");
|
||||
}
|
||||
|
||||
/* inflectional-only stemming */
|
||||
Analyzer b = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false));
|
||||
}
|
||||
};
|
||||
|
||||
/** Test stemming only inflectional suffixes */
|
||||
public void testInflectionalOnly() throws IOException {
|
||||
checkOneTerm(b, "bukunya", "buku");
|
||||
|
@ -143,5 +154,6 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.in;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -62,5 +61,6 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ItalianAnalyzer();
|
||||
new ItalianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "abbandonati", "abbandonat");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "dallo", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,11 +48,14 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
|||
ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "abbandonata", "abbandonata");
|
||||
checkOneTerm(a, "abbandonati", "abbandonat");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new ItalianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new ItalianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
/** test that the elisionfilter is working */
|
||||
|
@ -59,5 +63,6 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
|
|||
Analyzer a = new ItalianAnalyzer();
|
||||
assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
|
||||
assertAnalyzesTo(a, "l'Italiano", new String[] { "italian" });
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.it;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -32,13 +31,25 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link ItalianLightStemFilter}
|
||||
*/
|
||||
public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -59,5 +70,6 @@ public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new LatvianAnalyzer();
|
||||
new LatvianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "tirgus", "tirg");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "un", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
|
|||
LatvianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "tirgiem", "tirgiem");
|
||||
checkOneTerm(a, "tirgus", "tirg");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new LatvianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new LatvianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.lv;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -30,13 +29,25 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
* Basic tests for {@link LatvianStemmer}
|
||||
*/
|
||||
public class TestLatvianStemmer extends BaseTokenStreamTestCase {
|
||||
private Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
private Analyzer a;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
a.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testNouns1() throws IOException {
|
||||
// decl. I
|
||||
|
@ -279,5 +290,6 @@ public class TestLatvianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1933,6 +1933,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -1945,5 +1946,6 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,6 +137,7 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -148,6 +149,7 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -47,6 +47,7 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testRandomStrings() throws IOException {
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -78,6 +77,7 @@ public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -89,5 +89,6 @@ public class TestHyphenatedWordsFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,5 +66,6 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,12 +18,9 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
@ -49,6 +46,7 @@ public class TestLengthFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -52,6 +52,7 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
// equal to limit
|
||||
assertTokenStreamContents(a.tokenStream("dummy", "1 2 "), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -86,6 +87,7 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertEquals(0, reader.docFreq(t));
|
||||
reader.close();
|
||||
dir.close();
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase {
|
|||
// equal to limit
|
||||
assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
|
||||
new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.Rethrow;
|
||||
|
||||
/*
|
||||
|
@ -40,9 +41,11 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
|
||||
Map<String,Analyzer> analyzerPerField =
|
||||
Collections.<String,Analyzer>singletonMap("special", new SimpleAnalyzer());
|
||||
|
||||
Analyzer defaultAnalyzer = new WhitespaceAnalyzer();
|
||||
|
||||
PerFieldAnalyzerWrapper analyzer =
|
||||
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), analyzerPerField);
|
||||
new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField);
|
||||
|
||||
try (TokenStream tokenStream = analyzer.tokenStream("field", text)) {
|
||||
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
|
@ -67,6 +70,10 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
assertFalse(tokenStream.incrementToken());
|
||||
tokenStream.end();
|
||||
}
|
||||
// TODO: fix this about PFAW, this is crazy
|
||||
analyzer.close();
|
||||
defaultAnalyzer.close();
|
||||
IOUtils.close(analyzerPerField.values());
|
||||
}
|
||||
|
||||
public void testReuseWrapped() throws Exception {
|
||||
|
@ -124,6 +131,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
ts4 = wrapper3.tokenStream("moreSpecial", text);
|
||||
assertSame(ts3, ts4);
|
||||
assertSame(ts2, ts3);
|
||||
IOUtils.close(wrapper3, wrapper2, wrapper1, specialAnalyzer, defaultAnalyzer);
|
||||
}
|
||||
|
||||
public void testCharFilters() throws Exception {
|
||||
|
@ -152,5 +160,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
new int[] { 0 },
|
||||
new int[] { 2 }
|
||||
);
|
||||
p.close();
|
||||
a.close(); // TODO: fix this about PFAW, its a trap
|
||||
}
|
||||
}
|
||||
|
|
|
@ -163,6 +163,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
|
||||
checkRandomData(random(), analyzer, 200);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,6 +176,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -24,19 +24,27 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
public class TestScandinavianFoldingFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field) {
|
||||
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
final TokenStream stream = new ScandinavianFoldingFilter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field) {
|
||||
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
final TokenStream stream = new ScandinavianFoldingFilter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
|
||||
|
@ -117,6 +125,7 @@ public class TestScandinavianFoldingFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
|
|
@ -24,20 +24,27 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
|
||||
public class TestScandinavianNormalizationFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field) {
|
||||
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
final TokenStream stream = new ScandinavianNormalizationFilter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field) {
|
||||
final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
final TokenStream stream = new ScandinavianNormalizationFilter(tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
|
||||
|
@ -116,6 +123,7 @@ public class TestScandinavianNormalizationFilter extends BaseTokenStreamTestCase
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
|
|
@ -19,9 +19,10 @@ import java.io.IOException;
|
|||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
|
@ -79,7 +81,12 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testRandomRealisticWhiteSpace() throws IOException {
|
||||
Map<String,String> map = new HashMap<>();
|
||||
Set<String> seen = new HashSet<>();
|
||||
int numTerms = atLeast(50);
|
||||
boolean ignoreCase = random().nextBoolean();
|
||||
|
||||
CharacterUtils charUtils = CharacterUtils.getInstance();
|
||||
|
||||
for (int i = 0; i < numTerms; i++) {
|
||||
String randomRealisticUnicodeString = TestUtil
|
||||
.randomRealisticUnicodeString(random());
|
||||
|
@ -93,16 +100,31 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
|
|||
j += Character.charCount(cp);
|
||||
}
|
||||
if (builder.length() > 0) {
|
||||
String value = TestUtil.randomSimpleString(random());
|
||||
map.put(builder.toString(),
|
||||
value.isEmpty() ? "a" : value);
|
||||
|
||||
String inputValue = builder.toString();
|
||||
|
||||
// Make sure we don't try to add two inputs that vary only by case:
|
||||
String seenInputValue;
|
||||
if (ignoreCase) {
|
||||
// TODO: can we simply use inputValue.toLowerCase(Locale.ROOT)???
|
||||
char[] buffer = inputValue.toCharArray();
|
||||
charUtils.toLowerCase(buffer, 0, buffer.length);
|
||||
seenInputValue = buffer.toString();
|
||||
} else {
|
||||
seenInputValue = inputValue;
|
||||
}
|
||||
|
||||
if (seen.contains(seenInputValue) == false) {
|
||||
seen.add(seenInputValue);
|
||||
String value = TestUtil.randomSimpleString(random());
|
||||
map.put(inputValue,
|
||||
value.isEmpty() ? "a" : value);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (map.isEmpty()) {
|
||||
map.put("booked", "books");
|
||||
}
|
||||
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
|
||||
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
|
||||
Set<Entry<String,String>> entrySet = map.entrySet();
|
||||
StringBuilder input = new StringBuilder();
|
||||
List<String> output = new ArrayList<>();
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -103,6 +102,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -114,5 +114,6 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -292,6 +293,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 4, 4, 11 },
|
||||
new int[] { 10, 15, 15 },
|
||||
new int[] { 2, 0, 1 });
|
||||
IOUtils.close(a, a2, a3);
|
||||
}
|
||||
|
||||
/** concat numbers + words + all */
|
||||
|
@ -312,6 +314,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 0, 0, 0, 4, 8, 8, 12 },
|
||||
new int[] { 3, 7, 15, 7, 11, 15, 15 },
|
||||
new int[] { 1, 0, 0, 1, 1, 0, 1 });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** concat numbers + words + all + preserve original */
|
||||
|
@ -332,6 +335,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 0, 0, 0, 0, 4, 8, 8, 12 },
|
||||
new int[] { 15, 3, 7, 15, 7, 11, 15, 15 },
|
||||
new int[] { 1, 0, 0, 0, 1, 1, 0, 1 });
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -356,6 +360,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// TODO: properly support positionLengthAttribute
|
||||
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -381,6 +386,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// TODO: properly support positionLengthAttribute
|
||||
checkRandomData(random(), a, 20*RANDOM_MULTIPLIER, 8192, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -404,6 +410,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// depending upon options, this thing may or may not preserve the empty term
|
||||
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
* Tests {@link EdgeNGramTokenFilter} for correctness.
|
||||
|
@ -183,6 +182,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), a, 100*RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -197,6 +197,7 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testGraphs() throws IOException {
|
||||
|
|
|
@ -113,6 +113,7 @@ public class EdgeNGramTokenizerTest extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 20);
|
||||
checkRandomData(random(), a, 10*RANDOM_MULTIPLIER, 8192);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -140,6 +139,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
|
||||
new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -156,6 +156,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -170,6 +171,7 @@ public class NGramTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testSupplementaryCharacters() throws IOException {
|
||||
|
|
|
@ -123,6 +123,7 @@ public class NGramTokenizerTest extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
|
||||
checkRandomData(random(), a, 10*RANDOM_MULTIPLIER, 1027);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -117,6 +117,7 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "opheffen", "opheff");
|
||||
checkOneTerm(a, "opheffende", "opheff");
|
||||
checkOneTerm(a, "opheffing", "opheff");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
|
@ -125,6 +126,7 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "lichamelijk", "licham");
|
||||
checkOneTerm(a, "lichamelijke", "licham");
|
||||
checkOneTerm(a, "lichamelijkheden", "licham");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testExclusionTableViaCtor() throws IOException {
|
||||
|
@ -132,10 +134,11 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
|
|||
set.add("lichamelijk");
|
||||
DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
|
||||
|
||||
a.close();
|
||||
|
||||
a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
|
||||
assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
|
||||
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -145,12 +148,14 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testStemOverrides() throws IOException {
|
||||
DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET);
|
||||
checkOneTerm(a, "fiets", "fiets");
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testEmptyStemDictionary() throws IOException {
|
||||
DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET,
|
||||
CharArraySet.EMPTY_SET, CharArrayMap.<String>emptyMap());
|
||||
checkOneTerm(a, "fiets", "fiet");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -159,15 +164,20 @@ public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testStopwordsCasing() throws IOException {
|
||||
DutchAnalyzer a = new DutchAnalyzer();
|
||||
assertAnalyzesTo(a, "Zelf", new String[] { });
|
||||
a.close();
|
||||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new DutchAnalyzer(), input, expected);
|
||||
Analyzer analyzer = new DutchAnalyzer();
|
||||
checkOneTerm(analyzer, input, expected);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new DutchAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new DutchAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new NorwegianAnalyzer();
|
||||
new NorwegianAnalyzer().close();
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
|
@ -38,6 +38,7 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(a, "havnedistrikter", "havnedistrikt");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "det", new String[] {});
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
|
@ -47,10 +48,13 @@ public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
|
|||
NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTerm(a, "havnedistriktene", "havnedistriktene");
|
||||
checkOneTerm(a, "havnedistrikter", "havnedistrikt");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random(), new NorwegianAnalyzer(), 1000*RANDOM_MULTIPLIER);
|
||||
Analyzer analyzer = new NorwegianAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,7 @@ package org.apache.lucene.analysis.no;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.file.Files;
|
||||
import java.util.Random;
|
||||
|
||||
|
@ -36,18 +34,29 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
|
||||
import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
|
||||
|
||||
|
||||
/**
|
||||
* Simple tests for {@link NorwegianLightStemFilter}
|
||||
*/
|
||||
public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, BOKMAAL));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, BOKMAAL));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a vocabulary file */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -64,6 +73,7 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
assertVocabulary(analyzer, Files.newInputStream(getDataPath("nn_light.txt")));
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testKeyword() throws IOException {
|
||||
|
@ -77,6 +87,7 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "sekretæren", "sekretæren");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -94,5 +105,6 @@ public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,7 @@ package org.apache.lucene.analysis.no;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.file.Files;
|
||||
import java.util.Random;
|
||||
|
||||
|
@ -40,13 +38,25 @@ import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
|
|||
* Simple tests for {@link NorwegianMinimalStemFilter}
|
||||
*/
|
||||
public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(source, BOKMAAL));
|
||||
}
|
||||
};
|
||||
private Analyzer analyzer;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(source, BOKMAAL));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/** Test against a Bokmål vocabulary file */
|
||||
public void testVocabulary() throws IOException {
|
||||
|
@ -63,6 +73,7 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
assertVocabulary(analyzer, Files.newInputStream(getDataPath("nn_minimal.txt")));
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testKeyword() throws IOException {
|
||||
|
@ -76,6 +87,7 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "sekretæren", "sekretæren");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
@ -93,5 +105,6 @@ public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -227,6 +227,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// TODO: properly support positionLengthAttribute
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
|
@ -241,5 +242,6 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// TODO: properly support positionLengthAttribute
|
||||
checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,14 +17,12 @@ package org.apache.lucene.analysis.path;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||
|
||||
import static org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer.DEFAULT_DELIMITER;
|
||||
import static org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer.DEFAULT_SKIP;
|
||||
|
@ -187,6 +185,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// TODO: properly support positionLengthAttribute
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random large strings through the analyzer */
|
||||
|
@ -201,5 +200,6 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
};
|
||||
// TODO: properly support positionLengthAttribute
|
||||
checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ package org.apache.lucene.analysis.pattern;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.Reader;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -606,6 +606,7 @@ public class TestPatternCaptureGroupTokenFilter extends BaseTokenStreamTestCase
|
|||
};
|
||||
|
||||
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
|
||||
a.close();
|
||||
}
|
||||
|
||||
private void testPatterns(String input, String[] regexes, String[] tokens,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue