throw a best-effort NPE from the jflex-based tokenizers if you don't consume the TS correctly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401449 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-10-23 20:32:27 +00:00
parent 9caa4a68bd
commit e670e32471
6 changed files with 20 additions and 10 deletions

View File

@ -120,7 +120,7 @@ public final class ClassicTokenizer extends Tokenizer {
}
private void init(Version matchVersion) {
this.scanner = new ClassicTokenizerImpl(input);
this.scanner = new ClassicTokenizerImpl(null); // best effort NPE if you dont call reset
}
// this tokenizer generates three attributes:

View File

@ -134,7 +134,7 @@ public final class StandardTokenizer extends Tokenizer {
}
private final void init(Version matchVersion) {
this.scanner = new StandardTokenizerImpl(input);
this.scanner = new StandardTokenizerImpl(null); // best effort NPE if you dont call reset
}
// this tokenizer generates three attributes:

View File

@ -98,7 +98,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
*/
public UAX29URLEmailTokenizer(Version matchVersion, Reader input) {
super(input);
this.scanner = getScannerFor(matchVersion, input);
this.scanner = getScannerFor(matchVersion);
}
/**
@ -106,7 +106,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
*/
public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) {
super(source, input);
this.scanner = getScannerFor(matchVersion, input);
this.scanner = getScannerFor(matchVersion);
}
/**
@ -114,11 +114,11 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
*/
public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
super(factory, input);
this.scanner = getScannerFor(matchVersion, input);
this.scanner = getScannerFor(matchVersion);
}
private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
return new UAX29URLEmailTokenizerImpl(input);
private static StandardTokenizerInterface getScannerFor(Version matchVersion) {
return new UAX29URLEmailTokenizerImpl(null); // best effort NPE if you dont call reset
}
// this tokenizer generates three attributes:

View File

@ -143,7 +143,7 @@ public final class WikipediaTokenizer extends Tokenizer {
*/
public WikipediaTokenizer(Reader input, int tokenOutput, Set<String> untokenizedTypes) {
super(input);
this.scanner = new WikipediaTokenizerImpl(input);
this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
init(tokenOutput, untokenizedTypes);
}
@ -156,7 +156,7 @@ public final class WikipediaTokenizer extends Tokenizer {
*/
public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
super(factory, input);
this.scanner = new WikipediaTokenizerImpl(input);
this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
init(tokenOutput, untokenizedTypes);
}
@ -169,7 +169,7 @@ public final class WikipediaTokenizer extends Tokenizer {
*/
public WikipediaTokenizer(AttributeSource source, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
super(source, input);
this.scanner = new WikipediaTokenizerImpl(input);
this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
init(tokenOutput, untokenizedTypes);
}

View File

@ -52,9 +52,12 @@ public class TestElision extends BaseTokenStreamTestCase {
private List<String> filter(TokenFilter filter) throws IOException {
List<String> tas = new ArrayList<String>();
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
filter.reset();
while (filter.incrementToken()) {
tas.add(termAtt.toString());
}
filter.end();
filter.close();
return tas;
}

View File

@ -62,12 +62,16 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
ts_1.reset();
ts_1.incrementToken();
assertEquals("first stream", "liście", termAtt_1.toString());
ts_1.end();
ts_1.close();
TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
ts_2.reset();
ts_2.incrementToken();
assertEquals("second stream", "dany", termAtt_2.toString());
ts_2.end();
ts_2.close();
}
/** Test stemming of mixed-case tokens. */
@ -110,6 +114,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
public final void testPOSAttribute() throws IOException {
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście"));
ts.reset();
assertPOSToken(ts, "liście",
"subst:sg:acc:n2",
"subst:sg:nom:n2",
@ -127,6 +132,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
assertPOSToken(ts, "lista",
"subst:sg:dat:f",
"subst:sg:loc:f");
ts.end();
ts.close();
}
/** blast some random strings through the analyzer */