LUCENE-2413: convert over more tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@944925 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-05-16 22:21:48 +00:00
parent e292af7b12
commit 4092c03de7
10 changed files with 57 additions and 46 deletions

View File

@ -28,6 +28,9 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@ -143,9 +146,9 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
*/
private Analyzer randomAnalyzer() {
switch(random.nextInt(3)) {
case 0: return new SimpleAnalyzer(TEST_VERSION_CURRENT);
case 1: return new StopAnalyzer(TEST_VERSION_CURRENT);
default: return new StandardAnalyzer(TEST_VERSION_CURRENT);
case 0: return new MockAnalyzer(MockTokenizer.SIMPLE, true);
case 1: return new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
default: return new MockAnalyzer(MockTokenizer.WHITESPACE, false);
}
}

View File

@ -35,6 +35,7 @@ import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
@ -55,6 +56,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.messages.MessageImpl;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.messages.QueryParserMessages;
import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
@ -79,6 +81,8 @@ import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* This test case is a copy of the core Lucene query parser test, it was adapted
@ -1074,8 +1078,8 @@ public class TestQPHelper extends LocalizedTestCase {
public void testStopwords() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(
new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo" )));
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
qp.setAnalyzer(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true));
Query result = qp.parse("a:the OR a:foo", "a");
assertNotNull("result is null and it shouldn't be", result);
@ -1098,7 +1102,7 @@ public class TestQPHelper extends LocalizedTestCase {
public void testPositionIncrement() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(
new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this" )));
new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
qp.setEnablePositionIncrements(true);

View File

@ -33,6 +33,7 @@ import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
@ -73,6 +74,8 @@ import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* This test case is a copy of the core Lucene query parser test, it was adapted
@ -1059,7 +1062,8 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
}
public void testStopwords() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo")));
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
QueryParserWrapper qp = new QueryParserWrapper("a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true));
Query result = qp.parse("a:the OR a:foo");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@ -1078,7 +1082,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
}
public void testPositionIncrement() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this")));
QueryParserWrapper qp = new QueryParserWrapper("a", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
qp.setEnablePositionIncrements(true);
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
// 0 2 5 7 8

View File

@ -24,6 +24,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
@ -93,8 +94,7 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
ts = new LowerCaseFilter(TEST_VERSION_CURRENT, ts);
TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms);
return ts;
}
@ -110,9 +110,8 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
streams.result = new LowerCaseFilter(TEST_VERSION_CURRENT, streams.source);
streams.result = new SynonymTokenFilter(streams.result, synonyms, maxSynonyms);
streams.source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
streams.result = new SynonymTokenFilter(streams.source, synonyms, maxSynonyms);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);

View File

@ -22,6 +22,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
@ -108,7 +109,7 @@ public class TestDocumentWriter extends LuceneTestCase {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
}
@Override
@ -141,7 +142,7 @@ public class TestDocumentWriter extends LuceneTestCase {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new TokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)) {
return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) {
boolean first=true;
AttributeSource.State state;

View File

@ -38,6 +38,8 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -1868,7 +1870,7 @@ public class TestIndexWriter extends LuceneTestCase {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new CrashingFilter(fieldName, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
}
};
@ -1951,7 +1953,7 @@ public class TestIndexWriter extends LuceneTestCase {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new CrashingFilter(fieldName, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
}
};
@ -3098,7 +3100,7 @@ public class TestIndexWriter extends LuceneTestCase {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new CrashingFilter(fieldName, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
}
};
@ -4185,7 +4187,7 @@ public class TestIndexWriter extends LuceneTestCase {
public void testEndOffsetPositionStopFilter() throws Exception {
MockRAMDirectory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new StopAnalyzer(TEST_VERSION_CURRENT)));
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
Document doc = new Document();
Field f = new Field("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(f);
@ -4486,23 +4488,23 @@ public class TestIndexWriter extends LuceneTestCase {
Document doc = new Document();
Field f = new Field("binary", b, 10, 17);
f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field1")));
f.setTokenStream(new MockTokenizer(new StringReader("doc1field1"), MockTokenizer.WHITESPACE, false));
Field f2 = new Field("string", "value", Field.Store.YES,Field.Index.ANALYZED);
f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field2")));
f2.setTokenStream(new MockTokenizer(new StringReader("doc1field2"), MockTokenizer.WHITESPACE, false));
doc.add(f);
doc.add(f2);
w.addDocument(doc);
// add 2 docs to test in-memory merging
f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field1")));
f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field2")));
f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false));
f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false));
w.addDocument(doc);
// force segment flush so we can force a segment merge with doc3 later.
w.commit();
f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field1")));
f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field2")));
f.setTokenStream(new MockTokenizer(new StringReader("doc3field1"), MockTokenizer.WHITESPACE, false));
f2.setTokenStream(new MockTokenizer(new StringReader("doc3field2"), MockTokenizer.WHITESPACE, false));
w.addDocument(doc);
w.commit();

View File

@ -28,6 +28,7 @@ import java.util.Map;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
@ -407,7 +408,7 @@ public class TestPayloads extends LuceneTestCase {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
PayloadData payload = fieldToData.get(fieldName);
TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
if (payload != null) {
if (payload.numFieldInstancesToSkip == 0) {
ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);

View File

@ -32,6 +32,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
@ -65,6 +66,8 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Tests QueryParser.
@ -957,7 +960,8 @@ public class TestQueryParser extends LocalizedTestCase {
}
public void testStopwords() throws Exception {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo")));
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true));
Query result = qp.parse("a:the OR a:foo");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@ -973,7 +977,7 @@ public class TestQueryParser extends LocalizedTestCase {
}
public void testPositionIncrement() throws Exception {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this")));
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
qp.setEnablePositionIncrements(true);
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
// 0 2 5 7 8

View File

@ -53,7 +53,7 @@ public class TestPhraseQuery extends LuceneTestCase {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
}
@Override
@ -207,7 +207,7 @@ public class TestPhraseQuery extends LuceneTestCase {
public void testPhraseQueryWithStopAnalyzer() throws Exception {
RAMDirectory directory = new RAMDirectory();
StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
Analyzer stopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_24, stopAnalyzer));
Document doc = new Document();
@ -360,7 +360,7 @@ public class TestPhraseQuery extends LuceneTestCase {
}
public void testToString() throws Exception {
StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer);
qp.setEnablePositionIncrements(true);
PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");

View File

@ -54,6 +54,9 @@ import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Term position unit test.
@ -196,7 +199,7 @@ public class TestPositionIncrement extends LuceneTestCase {
// should not find "1 2" because there is a gap of 1 in the index
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
new StopWhitespaceAnalyzer(false));
new MockAnalyzer(MockTokenizer.WHITESPACE, false, stopStopList, false));
q = (PhraseQuery) qp.parse("\"1 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
@ -220,26 +223,16 @@ public class TestPositionIncrement extends LuceneTestCase {
// when both qp qnd stopFilter propagate increments, we should find the doc.
qp = new QueryParser(TEST_VERSION_CURRENT, "field",
new StopWhitespaceAnalyzer(true));
new MockAnalyzer(MockTokenizer.WHITESPACE, false, stopStopList, true));
qp.setEnablePositionIncrements(true);
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(1, hits.length);
}
private static class StopWhitespaceAnalyzer extends Analyzer {
boolean enablePositionIncrements;
final MockAnalyzer a = new MockAnalyzer();
public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
this.enablePositionIncrements = enablePositionIncrements;
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = a.tokenStream(fieldName,reader);
return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
}
}
// stoplist that accepts case-insensitive "stop"
private static final CharacterRunAutomaton stopStopList =
new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
public void testPayloadsPos0() throws Exception {
Directory dir = new MockRAMDirectory();