LUCENE-8273: TestRandomChains found some more end() handling problems

This commit is contained in:
Alan Woodward 2018-05-21 15:12:32 +01:00
parent 63e213916c
commit 0c0fce3e98
2 changed files with 74 additions and 19 deletions

View File

@ -80,10 +80,10 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
public void end() throws IOException { public void end() throws IOException {
// imitate Tokenizer.end() call - endAttributes, set final offset // imitate Tokenizer.end() call - endAttributes, set final offset
if (exhausted) { if (exhausted) {
if (endCalled == false) { if (endState == null) {
input.end(); input.end();
endState = captureState();
} }
endCalled = true;
endOffset = offsetAtt.endOffset(); endOffset = offsetAtt.endOffset();
} }
endAttributes(); endAttributes();
@ -96,7 +96,7 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
private boolean lastTokenFiltered; private boolean lastTokenFiltered;
private State bufferedState = null; private State bufferedState = null;
private boolean exhausted; private boolean exhausted;
private boolean endCalled; private State endState = null;
private int endOffset; private int endOffset;
private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
@ -125,18 +125,22 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
this.bufferedState = null; this.bufferedState = null;
this.exhausted = false; this.exhausted = false;
this.endOffset = -1; this.endOffset = -1;
this.endCalled = false; this.endState = null;
} }
@Override @Override
public void end() throws IOException { public void end() throws IOException {
if (endCalled == false) { if (endState == null) {
super.end(); super.end();
endCalled = true; endState = captureState();
}
else {
restoreState(endState);
} }
endOffset = getAttribute(OffsetAttribute.class).endOffset(); endOffset = getAttribute(OffsetAttribute.class).endOffset();
if (lastTokenFiltered) { if (lastTokenFiltered) {
this.delegate.end(); this.delegate.end();
endState = captureState();
} }
} }

View File

@ -37,7 +37,10 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ValidatingTokenFilter; import org.apache.lucene.analysis.ValidatingTokenFilter;
import org.apache.lucene.analysis.core.TypeTokenFilter; import org.apache.lucene.analysis.core.TypeTokenFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter; import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer; import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser; import org.apache.lucene.analysis.synonym.SolrSynonymParser;
@ -308,8 +311,36 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new NGramTokenizer(); Tokenizer source = new NGramTokenizer();
TokenStream sink = new KeywordRepeatFilter(source); TokenStream sink = new KeywordRepeatFilter(source);
sink = new ConditionalTokenFilter(sink, in -> new TypeTokenFilter(in, Collections.singleton("word"))) { sink = new RandomSkippingFilter(sink, seed, in -> new TypeTokenFilter(in, Collections.singleton("word")));
Random random = new Random(seed); sink = new ValidatingTokenFilter(sink, "last stage");
return new TokenStreamComponents(source, sink);
}
};
checkRandomData(random(), analyzer, 1);
}
public void testEndWithShingles() throws IOException {
TokenStream ts = whitespaceMockTokenizer("cyk jvboq \u092e\u0962\u093f");
ts = new GermanStemFilter(ts);
ts = new NonRandomSkippingFilter(ts, in -> new FixedShingleFilter(in, 2), true, false, true);
ts = new NonRandomSkippingFilter(ts, IndicNormalizationFilter::new, true);
assertTokenStreamContents(ts, new String[]{"jvboq"});
}
private static class RandomSkippingFilter extends ConditionalTokenFilter {
Random random;
final long seed;
protected RandomSkippingFilter(TokenStream input, long seed, Function<TokenStream, TokenStream> inputFactory) {
super(input, inputFactory);
this.seed = seed;
this.random = new Random(seed);
}
@Override @Override
protected boolean shouldFilter() throws IOException { protected boolean shouldFilter() throws IOException {
return random.nextBoolean(); return random.nextBoolean();
@ -320,14 +351,34 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
super.reset(); super.reset();
random = new Random(seed); random = new Random(seed);
} }
};
sink = new ValidatingTokenFilter(sink, "last stage");
return new TokenStreamComponents(source, sink);
} }
};
checkRandomData(random(), analyzer, 1); private static class NonRandomSkippingFilter extends ConditionalTokenFilter {
final boolean[] shouldFilters;
int pos;
/**
* Create a new BypassingTokenFilter
*
* @param input the input TokenStream
* @param inputFactory a factory function to create a new instance of the TokenFilter to wrap
*/
protected NonRandomSkippingFilter(TokenStream input, Function<TokenStream, TokenStream> inputFactory, boolean... shouldFilters) {
super(input, inputFactory);
this.shouldFilters = shouldFilters;
}
@Override
protected boolean shouldFilter() throws IOException {
return shouldFilters[pos++ % shouldFilters.length];
}
@Override
public void reset() throws IOException {
super.reset();
pos = 0;
}
} }
} }