mirror of https://github.com/apache/lucene.git
LUCENE-8376, LUCENE-8371: ConditionalTokenFilter fixes
This commit is contained in:
parent
ea4043b954
commit
f835d24997
|
@ -137,6 +137,10 @@ Bug Fixes:
|
|||
TestLucene{54,70}DocValuesFormat.testSortedSetVariableLengthBigVsStoredFields()
|
||||
failures (Erick Erickson)
|
||||
|
||||
* LUCENE-8376, LUCENE-8371: ConditionalTokenFilter.end() would not propagate correctly
|
||||
if the last token in the stream was subsequently dropped; FixedShingleFilter did
|
||||
not set position increment in end() (Alan Woodward)
|
||||
|
||||
Changes in Runtime Behavior:
|
||||
|
||||
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing
|
||||
|
|
|
@ -159,6 +159,7 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
|
|||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
lastTokenFiltered = false;
|
||||
while (true) {
|
||||
if (state == TokenState.READING) {
|
||||
if (bufferedState != null) {
|
||||
|
@ -192,16 +193,15 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
|
|||
adjustPosition = false;
|
||||
}
|
||||
else {
|
||||
lastTokenFiltered = false;
|
||||
state = TokenState.READING;
|
||||
return endDelegating();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
lastTokenFiltered = false;
|
||||
return true;
|
||||
}
|
||||
if (state == TokenState.DELEGATING) {
|
||||
lastTokenFiltered = true;
|
||||
if (delegate.incrementToken()) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -139,6 +139,7 @@ public final class FixedShingleFilter extends TokenFilter {
|
|||
}
|
||||
clearAttributes();
|
||||
this.offsetAtt.setOffset(0, endToken.endOffset());
|
||||
this.incAtt.setPositionIncrement(endToken.posInc());
|
||||
}
|
||||
|
||||
private void finishInnerStream() throws IOException {
|
||||
|
|
|
@ -132,6 +132,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
// FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
|
||||
// can break position lengths
|
||||
avoidConditionals.add(FlattenGraphFilter.class);
|
||||
// LimitToken*Filters don't set end offsets correctly
|
||||
avoidConditionals.add(LimitTokenOffsetFilter.class);
|
||||
avoidConditionals.add(LimitTokenCountFilter.class);
|
||||
avoidConditionals.add(LimitTokenPositionFilter.class);
|
||||
}
|
||||
|
||||
private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();
|
||||
|
|
|
@ -173,17 +173,6 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testEndPropagation() throws IOException {
|
||||
CannedTokenStream cts1 = new CannedTokenStream(0, 20,
|
||||
new Token("alice", 0, 5), new Token("bob", 6, 8)
|
||||
);
|
||||
TokenStream ts1 = new ConditionalTokenFilter(cts1, EndTrimmingFilter::new) {
|
||||
@Override
|
||||
protected boolean shouldFilter() throws IOException {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
assertTokenStreamContents(ts1, new String[]{ "alice", "bob" },
|
||||
null, null, null, null, null, 20);
|
||||
|
||||
CannedTokenStream cts2 = new CannedTokenStream(0, 20,
|
||||
new Token("alice", 0, 5), new Token("bob", 6, 8)
|
||||
|
@ -196,6 +185,19 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
assertTokenStreamContents(ts2, new String[]{ "alice", "bob" },
|
||||
null, null, null, null, null, 18);
|
||||
|
||||
CannedTokenStream cts1 = new CannedTokenStream(0, 20,
|
||||
new Token("alice", 0, 5), new Token("bob", 6, 8)
|
||||
);
|
||||
TokenStream ts1 = new ConditionalTokenFilter(cts1, EndTrimmingFilter::new) {
|
||||
@Override
|
||||
protected boolean shouldFilter() throws IOException {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
assertTokenStreamContents(ts1, new String[]{ "alice", "bob" },
|
||||
null, null, null, null, null, 20);
|
||||
|
||||
}
|
||||
|
||||
public void testWrapGraphs() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue