LUCENE-3397: Cleaned up remaining test TSs and PatterAnalyzer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1161986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-08-26 04:16:19 +00:00
parent a555a34924
commit 0f2d7ad556
4 changed files with 52 additions and 12 deletions

View File

@ -311,6 +311,12 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
tokens[p++].copyTo(reusableToken);
return true;
}
@Override
public void reset() throws IOException {
super.reset();
this.p = 0;
}
};
return ts;
}

View File

@ -447,6 +447,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
}
return input.incrementToken();
}
@Override
public void reset() throws IOException {
super.reset();
this.count = 0;
}
};
}

View File

@ -492,7 +492,6 @@ public class TestPayloadSpans extends LuceneTestCase {
final class PayloadFilter extends TokenFilter {
String fieldName;
int numSeen = 0;
Set<String> entities = new HashSet<String>();
Set<String> nopayload = new HashSet<String>();
int pos;
@ -530,6 +529,12 @@ public class TestPayloadSpans extends LuceneTestCase {
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
this.pos = 0;
}
}
public final class TestPayloadAnalyzer extends Analyzer {

View File

@ -28,6 +28,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -220,10 +221,6 @@ public final class PatternAnalyzer extends Analyzer {
*/
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (reader instanceof FastStringReader) { // fast path
return tokenStream(fieldName, ((FastStringReader)reader).getString());
}
try {
String text = toString(reader);
return tokenStream(fieldName, text);
@ -290,6 +287,10 @@ public final class PatternAnalyzer extends Analyzer {
* @throws IOException if an I/O error occurs while reading the stream
*/
private static String toString(Reader input) throws IOException {
if (input instanceof FastStringReader) { // fast path
return ((FastStringReader) input).getString();
}
try {
int len = 256;
char[] buffer = new char[len];
@ -324,9 +325,9 @@ public final class PatternAnalyzer extends Analyzer {
* The work horse; performance isn't fantastic, but it's not nearly as bad
* as one might think - kudos to the Sun regex developers.
*/
private static final class PatternTokenizer extends TokenStream {
private static final class PatternTokenizer extends Tokenizer {
private final String str;
private String str;
private final boolean toLowerCase;
private Matcher matcher;
private int pos = 0;
@ -373,6 +374,18 @@ public final class PatternAnalyzer extends Analyzer {
final int finalOffset = str.length();
this.offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset(Reader input) throws IOException {
super.reset(input);
this.str = PatternAnalyzer.toString(input);
}
@Override
public void reset() throws IOException {
super.reset();
this.pos = 0;
}
}
@ -383,9 +396,9 @@ public final class PatternAnalyzer extends Analyzer {
* Special-case class for best performance in common cases; this class is
* otherwise unnecessary.
*/
private static final class FastStringTokenizer extends TokenStream {
private static final class FastStringTokenizer extends Tokenizer {
private final String str;
private String str;
private int pos;
private final boolean isLetter;
private final boolean toLowerCase;
@ -465,6 +478,16 @@ public final class PatternAnalyzer extends Analyzer {
return stopWords != null && stopWords.contains(text);
}
@Override
public void reset(Reader input) throws IOException {
this.str = PatternAnalyzer.toString(input);
}
@Override
public void reset() throws IOException {
super.reset();
this.pos = 0;
}
}