LUCENE-3397: Cleaned up test TokenStream reusability

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1161488 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-08-25 10:42:29 +00:00
parent 1057d24e7f
commit 0f443840c4
17 changed files with 219 additions and 124 deletions

View File

@ -31,14 +31,7 @@ import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -1842,18 +1835,18 @@ final class SynonymAnalyzer extends Analyzer {
*
*/
final class SynonymTokenizer extends TokenStream {
private TokenStream realStream;
private final TokenStream realStream;
private Token currentRealToken = null;
private Map<String,String> synonyms;
StringTokenizer st = null;
private CharTermAttribute realTermAtt;
private PositionIncrementAttribute realPosIncrAtt;
private OffsetAttribute realOffsetAtt;
private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private OffsetAttribute offsetAtt;
private final Map<String, String> synonyms;
private StringTokenizer st = null;
private final CharTermAttribute realTermAtt;
private final PositionIncrementAttribute realPosIncrAtt;
private final OffsetAttribute realOffsetAtt;
private final CharTermAttribute termAtt;
private final PositionIncrementAttribute posIncrAtt;
private final OffsetAttribute offsetAtt;
public SynonymTokenizer(TokenStream realStream, Map<String,String> synonyms) {
public SynonymTokenizer(TokenStream realStream, Map<String, String> synonyms) {
this.realStream = realStream;
this.synonyms = synonyms;
realTermAtt = realStream.addAttribute(CharTermAttribute.class);
@ -1905,6 +1898,13 @@ final class SynonymTokenizer extends TokenStream {
}
@Override
public void reset() throws IOException {
super.reset();
this.currentRealToken = null;
this.st = null;
}
static abstract class TestHighlightRunner {
static final int QUERY = 0;
static final int QUERY_TERM = 1;

View File

@ -149,7 +149,7 @@ public class TestDocumentWriter extends LuceneTestCase {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) {
boolean first=true;
boolean first = true;
AttributeSource.State state;
@Override
@ -180,9 +180,16 @@ public class TestDocumentWriter extends LuceneTestCase {
}
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@Override
public void reset() throws IOException {
super.reset();
first = true;
state = null;
}
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
};
}
};

View File

@ -1660,21 +1660,15 @@ public class TestIndexWriter extends LuceneTestCase {
}
private static class StringSplitTokenizer extends Tokenizer {
private final String[] tokens;
private int upto = 0;
private String[] tokens;
private int upto;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public StringSplitTokenizer(Reader r) {
try {
final StringBuilder b = new StringBuilder();
final char[] buffer = new char[1024];
int n;
while((n = r.read(buffer)) != -1) {
b.append(buffer, 0, n);
}
tokens = b.toString().split(" ");
} catch (IOException ioe) {
throw new RuntimeException(ioe);
reset(r);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@ -1690,6 +1684,18 @@ public class TestIndexWriter extends LuceneTestCase {
return false;
}
}
@Override
public void reset(Reader input) throws IOException {
this.upto = 0;
final StringBuilder b = new StringBuilder();
final char[] buffer = new char[1024];
int n;
while ((n = input.read(buffer)) != -1) {
b.append(buffer, 0, n);
}
this.tokens = b.toString().split(" ");
}
}
/**

View File

@ -457,7 +457,7 @@ public class TestPayloads extends LuceneTestCase {
private byte[] data;
private int length;
private int offset;
Payload payload = new Payload();
private int startOffset;
PayloadAttribute payloadAtt;
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
@ -465,6 +465,7 @@ public class TestPayloads extends LuceneTestCase {
this.data = data;
this.length = length;
this.offset = offset;
this.startOffset = offset;
payloadAtt = addAttribute(PayloadAttribute.class);
}
@ -484,6 +485,12 @@ public class TestPayloads extends LuceneTestCase {
return hasNext;
}
@Override
public void reset() throws IOException {
super.reset();
this.offset = startOffset;
}
}
public void testThreadSafety() throws Exception {

View File

@ -21,7 +21,9 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -35,7 +37,6 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
/**
* Attempt to reproduce an assertion error that happens
* only with the trunk version around April 2011.
* @param args
*/
public void test() throws Exception {
Directory dir = newDirectory();
@ -72,16 +73,16 @@ final class BugReproAnalyzer extends Analyzer{
}
}
final class BugReproAnalyzerTokenizer extends TokenStream {
final class BugReproAnalyzerTokenizer extends Tokenizer {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
int tokenCount = 4;
int nextTokenIndex = 0;
String terms[] = new String[]{"six", "six", "drunken", "drunken"};
int starts[] = new int[]{0, 0, 4, 4};
int ends[] = new int[]{3, 3, 11, 11};
int incs[] = new int[]{1, 0, 1, 0};
private final int tokenCount = 4;
private int nextTokenIndex = 0;
private final String terms[] = new String[]{"six", "six", "drunken", "drunken"};
private final int starts[] = new int[]{0, 0, 4, 4};
private final int ends[] = new int[]{3, 3, 11, 11};
private final int incs[] = new int[]{1, 0, 1, 0};
@Override
public boolean incrementToken() throws IOException {
@ -95,4 +96,10 @@ final class BugReproAnalyzerTokenizer extends TokenStream {
return false;
}
}
@Override
public void reset() throws IOException {
super.reset();
this.nextTokenIndex = 0;
}
}

View File

@ -24,8 +24,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.SortedSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -131,11 +130,11 @@ public class TestTermVectorsReader extends LuceneTestCase {
}
private class MyTokenStream extends TokenStream {
int tokenUpto;
private int tokenUpto;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
OffsetAttribute offsetAtt;
private final CharTermAttribute termAtt;
private final PositionIncrementAttribute posIncrAtt;
private final OffsetAttribute offsetAtt;
public MyTokenStream() {
termAtt = addAttribute(CharTermAttribute.class);
@ -160,6 +159,12 @@ public class TestTermVectorsReader extends LuceneTestCase {
return true;
}
}
@Override
public void reset() throws IOException {
super.reset();
this.tokenUpto = 0;
}
}
private class MyAnalyzer extends Analyzer {

View File

@ -22,7 +22,9 @@ import java.io.Reader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -31,13 +33,20 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
class RepeatingTokenStream extends TokenStream {
public int num;
class RepeatingTokenStream extends Tokenizer {
private final Random random;
private final float percentDocs;
private final int maxTF;
private int num;
CharTermAttribute termAtt;
String value;
public RepeatingTokenStream(String val) {
public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
this.value = val;
this.random = random;
this.percentDocs = percentDocs;
this.maxTF = maxTF;
this.termAtt = addAttribute(CharTermAttribute.class);
}
@ -51,19 +60,27 @@ class RepeatingTokenStream extends TokenStream {
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
if (random.nextFloat() < percentDocs) {
num = random.nextInt(maxTF) + 1;
} else {
num = 0;
}
}
}
public class TestTermdocPerf extends LuceneTestCase {
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
final RepeatingTokenStream ts = new RepeatingTokenStream(val);
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (random.nextFloat() < percentDocs) ts.num = random.nextInt(maxTF)+1;
else ts.num=0;
return ts;
}
};

View File

@ -378,6 +378,13 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
return false;
}
}
@Override
public void reset() throws IOException {
super.reset();
this.upto = 0;
this.lastPos = 0;
}
}
public void testZeroPosIncr() throws IOException {

View File

@ -17,15 +17,12 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockPayloadAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -82,6 +79,12 @@ public class TestPositionIncrement extends LuceneTestCase {
i++;
return true;
}
@Override
public void reset() throws IOException {
super.reset();
this.i = 0;
}
};
}
};

View File

@ -57,7 +57,6 @@ public class PayloadHelper {
public final class PayloadAnalyzer extends Analyzer {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
@ -67,9 +66,9 @@ public class PayloadHelper {
}
public final class PayloadFilter extends TokenFilter {
String fieldName;
int numSeen = 0;
PayloadAttribute payloadAtt;
private final String fieldName;
private int numSeen = 0;
private final PayloadAttribute payloadAtt;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
@ -81,18 +80,13 @@ public class PayloadHelper {
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (fieldName.equals(FIELD))
{
if (fieldName.equals(FIELD)) {
payloadAtt.setPayload(new Payload(payloadField));
}
else if (fieldName.equals(MULTI_FIELD))
{
if (numSeen % 2 == 0)
{
} else if (fieldName.equals(MULTI_FIELD)) {
if (numSeen % 2 == 0) {
payloadAtt.setPayload(new Payload(payloadMultiField1));
}
else
{
else {
payloadAtt.setPayload(new Payload(payloadMultiField2));
}
numSeen++;
@ -101,6 +95,12 @@ public class PayloadHelper {
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
this.numSeen = 0;
}
}
/**

View File

@ -18,10 +18,7 @@ package org.apache.lucene.search.payloads;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -68,9 +65,9 @@ public class TestPayloadNearQuery extends LuceneTestCase {
}
private static class PayloadFilter extends TokenFilter {
String fieldName;
int numSeen = 0;
protected PayloadAttribute payAtt;
private final String fieldName;
private int numSeen = 0;
private final PayloadAttribute payAtt;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
@ -81,7 +78,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
@Override
public boolean incrementToken() throws IOException {
boolean result = false;
if (input.incrementToken() == true){
if (input.incrementToken()) {
if (numSeen % 2 == 0) {
payAtt.setPayload(new Payload(payload2));
} else {
@ -92,6 +89,12 @@ public class TestPayloadNearQuery extends LuceneTestCase {
}
return result;
}
@Override
public void reset() throws IOException {
super.reset();
this.numSeen = 0;
}
}
private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {

View File

@ -16,6 +16,7 @@ package org.apache.lucene.search.payloads;
* limitations under the License.
*/
import org.apache.lucene.analysis.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.English;
@ -33,10 +34,6 @@ import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.spans.MultiSpansWrapper;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
@ -78,10 +75,10 @@ public class TestPayloadTermQuery extends LuceneTestCase {
}
private static class PayloadFilter extends TokenFilter {
String fieldName;
int numSeen = 0;
private final String fieldName;
private int numSeen = 0;
PayloadAttribute payloadAtt;
private final PayloadAttribute payloadAtt;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
@ -108,6 +105,12 @@ public class TestPayloadTermQuery extends LuceneTestCase {
return false;
}
}
@Override
public void reset() throws IOException {
super.reset();
this.numSeen = 0;
}
}
@BeforeClass

View File

@ -17,18 +17,15 @@ package org.apache.lucene.queryparser.classic;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
@ -144,10 +141,10 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
private int prevStartOffset;
private int prevEndOffset;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
OffsetAttribute offsetAtt;
TypeAttribute typeAtt;
private final CharTermAttribute termAtt;
private final PositionIncrementAttribute posIncrAtt;
private final OffsetAttribute offsetAtt;
private final TypeAttribute typeAtt;
public TestFilter(TokenStream in) {
super(in);
@ -168,7 +165,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
return true;
} else {
boolean next = input.incrementToken();
if (next == false) {
if (!next) {
return false;
}
prevType = typeAtt.type();
@ -186,6 +183,13 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
}
}
}
public void reset() throws IOException {
super.reset();
this.prevType = null;
this.prevStartOffset = 0;
this.prevEndOffset = 0;
}
}
/**

View File

@ -79,6 +79,13 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
return false;
}
}
@Override
public void reset(Reader reader) throws IOException {
super.reset(reader);
this.upto = 0;
this.lastPos = 0;
}
}
public void testMultiPhraseQueryParsing() throws Exception {

View File

@ -26,12 +26,7 @@ import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateTools;
@ -78,13 +73,14 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
super(in);
}
boolean inPhrase = false;
private boolean inPhrase = false;
int savedStart = 0, savedEnd = 0;
private int savedStart = 0;
private int savedEnd = 0;
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public boolean incrementToken() throws IOException {
@ -106,6 +102,14 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
return true;
return false;
}
@Override
public void reset() throws IOException {
super.reset();
this.inPhrase = false;
this.savedStart = 0;
this.savedEnd = 0;
}
}
public static final class QPTestAnalyzer extends Analyzer {

View File

@ -17,12 +17,10 @@ package org.apache.lucene.queryparser.flexible.standard;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -184,7 +182,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
return true;
} else {
boolean next = input.incrementToken();
if (next == false) {
if (!next) {
return false;
}
prevType = typeAtt.type();
@ -203,6 +201,13 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
}
}
@Override
public void reset() throws IOException {
super.reset();
this.prevType = null;
this.prevStartOffset = 0;
this.prevEndOffset = 0;
}
}
/**

View File

@ -28,13 +28,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -98,8 +92,9 @@ public class TestQPHelper extends LuceneTestCase {
super(in);
}
boolean inPhrase = false;
int savedStart = 0, savedEnd = 0;
private boolean inPhrase = false;
private int savedStart = 0;
private int savedEnd = 0;
@Override
public boolean incrementToken() throws IOException {
@ -123,6 +118,14 @@ public class TestQPHelper extends LuceneTestCase {
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
this.inPhrase = false;
this.savedStart = 0;
this.savedEnd = 0;
}
}
public static final class QPTestAnalyzer extends Analyzer {
@ -1203,10 +1206,11 @@ public class TestQPHelper extends LuceneTestCase {
super.tearDown();
}
private class CannedTokenStream extends TokenStream {
private class CannedTokenStream extends Tokenizer {
private int upto = 0;
final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
final CharTermAttribute term = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
private final CharTermAttribute term = addAttribute(CharTermAttribute.class);
@Override
public boolean incrementToken() {
clearAttributes();
@ -1229,6 +1233,12 @@ public class TestQPHelper extends LuceneTestCase {
upto++;
return true;
}
@Override
public void reset() throws IOException {
super.reset();
this.upto = 0;
}
}
private class CannedAnalyzer extends Analyzer {