LUCENE-8376, LUCENE-8371: ConditionalTokenFilter fixes

This commit is contained in:
Alan Woodward 2018-07-02 15:36:12 +01:00
parent ea4043b954
commit f835d24997
5 changed files with 24 additions and 13 deletions

View File

@ -137,6 +137,10 @@ Bug Fixes:
TestLucene{54,70}DocValuesFormat.testSortedSetVariableLengthBigVsStoredFields()
failures (Erick Erickson)
* LUCENE-8376, LUCENE-8371: ConditionalTokenFilter.end() would not propagate correctly
if the last token in the stream was subsequently dropped; FixedShingleFilter did
not set position increment in end() (Alan Woodward)
Changes in Runtime Behavior:
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

View File

@ -159,6 +159,7 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
@Override
public final boolean incrementToken() throws IOException {
lastTokenFiltered = false;
while (true) {
if (state == TokenState.READING) {
if (bufferedState != null) {
@ -192,16 +193,15 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
adjustPosition = false;
}
else {
lastTokenFiltered = false;
state = TokenState.READING;
return endDelegating();
}
return true;
}
lastTokenFiltered = false;
return true;
}
if (state == TokenState.DELEGATING) {
lastTokenFiltered = true;
if (delegate.incrementToken()) {
return true;
}

View File

@ -139,6 +139,7 @@ public final class FixedShingleFilter extends TokenFilter {
}
clearAttributes();
this.offsetAtt.setOffset(0, endToken.endOffset());
this.incAtt.setPositionIncrement(endToken.posInc());
}
private void finishInnerStream() throws IOException {

View File

@ -132,6 +132,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
// can break position lengths
avoidConditionals.add(FlattenGraphFilter.class);
// LimitToken*Filters don't set end offsets correctly
avoidConditionals.add(LimitTokenOffsetFilter.class);
avoidConditionals.add(LimitTokenCountFilter.class);
avoidConditionals.add(LimitTokenPositionFilter.class);
}
private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();

View File

@ -173,17 +173,6 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
}
public void testEndPropagation() throws IOException {
CannedTokenStream cts1 = new CannedTokenStream(0, 20,
new Token("alice", 0, 5), new Token("bob", 6, 8)
);
TokenStream ts1 = new ConditionalTokenFilter(cts1, EndTrimmingFilter::new) {
@Override
protected boolean shouldFilter() throws IOException {
return false;
}
};
assertTokenStreamContents(ts1, new String[]{ "alice", "bob" },
null, null, null, null, null, 20);
CannedTokenStream cts2 = new CannedTokenStream(0, 20,
new Token("alice", 0, 5), new Token("bob", 6, 8)
@ -196,6 +185,19 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
};
assertTokenStreamContents(ts2, new String[]{ "alice", "bob" },
null, null, null, null, null, 18);
CannedTokenStream cts1 = new CannedTokenStream(0, 20,
new Token("alice", 0, 5), new Token("bob", 6, 8)
);
TokenStream ts1 = new ConditionalTokenFilter(cts1, EndTrimmingFilter::new) {
@Override
protected boolean shouldFilter() throws IOException {
return false;
}
};
assertTokenStreamContents(ts1, new String[]{ "alice", "bob" },
null, null, null, null, null, 20);
}
public void testWrapGraphs() throws Exception {