mirror of https://github.com/apache/lucene.git
throw a best-effort NPE from the jflex-based tokenizers if you don't consume the TS correctly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401449 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9caa4a68bd
commit
e670e32471
|
@ -120,7 +120,7 @@ public final class ClassicTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
private void init(Version matchVersion) {
|
||||
this.scanner = new ClassicTokenizerImpl(input);
|
||||
this.scanner = new ClassicTokenizerImpl(null); // best effort NPE if you dont call reset
|
||||
}
|
||||
|
||||
// this tokenizer generates three attributes:
|
||||
|
|
|
@ -134,7 +134,7 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
private final void init(Version matchVersion) {
|
||||
this.scanner = new StandardTokenizerImpl(input);
|
||||
this.scanner = new StandardTokenizerImpl(null); // best effort NPE if you dont call reset
|
||||
}
|
||||
|
||||
// this tokenizer generates three attributes:
|
||||
|
|
|
@ -98,7 +98,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
|||
*/
|
||||
public UAX29URLEmailTokenizer(Version matchVersion, Reader input) {
|
||||
super(input);
|
||||
this.scanner = getScannerFor(matchVersion, input);
|
||||
this.scanner = getScannerFor(matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -106,7 +106,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
|||
*/
|
||||
public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) {
|
||||
super(source, input);
|
||||
this.scanner = getScannerFor(matchVersion, input);
|
||||
this.scanner = getScannerFor(matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -114,11 +114,11 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
|||
*/
|
||||
public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
|
||||
super(factory, input);
|
||||
this.scanner = getScannerFor(matchVersion, input);
|
||||
this.scanner = getScannerFor(matchVersion);
|
||||
}
|
||||
|
||||
private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
|
||||
return new UAX29URLEmailTokenizerImpl(input);
|
||||
private static StandardTokenizerInterface getScannerFor(Version matchVersion) {
|
||||
return new UAX29URLEmailTokenizerImpl(null); // best effort NPE if you dont call reset
|
||||
}
|
||||
|
||||
// this tokenizer generates three attributes:
|
||||
|
|
|
@ -143,7 +143,7 @@ public final class WikipediaTokenizer extends Tokenizer {
|
|||
*/
|
||||
public WikipediaTokenizer(Reader input, int tokenOutput, Set<String> untokenizedTypes) {
|
||||
super(input);
|
||||
this.scanner = new WikipediaTokenizerImpl(input);
|
||||
this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
|
||||
init(tokenOutput, untokenizedTypes);
|
||||
}
|
||||
|
||||
|
@ -156,7 +156,7 @@ public final class WikipediaTokenizer extends Tokenizer {
|
|||
*/
|
||||
public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
|
||||
super(factory, input);
|
||||
this.scanner = new WikipediaTokenizerImpl(input);
|
||||
this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
|
||||
init(tokenOutput, untokenizedTypes);
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,7 @@ public final class WikipediaTokenizer extends Tokenizer {
|
|||
*/
|
||||
public WikipediaTokenizer(AttributeSource source, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
|
||||
super(source, input);
|
||||
this.scanner = new WikipediaTokenizerImpl(input);
|
||||
this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
|
||||
init(tokenOutput, untokenizedTypes);
|
||||
}
|
||||
|
||||
|
|
|
@ -52,9 +52,12 @@ public class TestElision extends BaseTokenStreamTestCase {
|
|||
private List<String> filter(TokenFilter filter) throws IOException {
|
||||
List<String> tas = new ArrayList<String>();
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
filter.reset();
|
||||
while (filter.incrementToken()) {
|
||||
tas.add(termAtt.toString());
|
||||
}
|
||||
filter.end();
|
||||
filter.close();
|
||||
return tas;
|
||||
}
|
||||
|
||||
|
|
|
@ -62,12 +62,16 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
ts_1.reset();
|
||||
ts_1.incrementToken();
|
||||
assertEquals("first stream", "liście", termAtt_1.toString());
|
||||
ts_1.end();
|
||||
ts_1.close();
|
||||
|
||||
TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
|
||||
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
|
||||
ts_2.reset();
|
||||
ts_2.incrementToken();
|
||||
assertEquals("second stream", "dany", termAtt_2.toString());
|
||||
ts_2.end();
|
||||
ts_2.close();
|
||||
}
|
||||
|
||||
/** Test stemming of mixed-case tokens. */
|
||||
|
@ -110,6 +114,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
public final void testPOSAttribute() throws IOException {
|
||||
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście"));
|
||||
|
||||
ts.reset();
|
||||
assertPOSToken(ts, "liście",
|
||||
"subst:sg:acc:n2",
|
||||
"subst:sg:nom:n2",
|
||||
|
@ -127,6 +132,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertPOSToken(ts, "lista",
|
||||
"subst:sg:dat:f",
|
||||
"subst:sg:loc:f");
|
||||
ts.end();
|
||||
ts.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
|
Loading…
Reference in New Issue