mirror of https://github.com/apache/lucene.git
LUCENE-8273: Adjust position increments when filtering stacked tokens
This commit is contained in:
parent
34741a863a
commit
4ea9d2ea8c
|
@ -168,6 +168,16 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
|
|||
return false;
|
||||
}
|
||||
if (shouldFilter()) {
|
||||
// we're chopping the underlying Tokenstream up into fragments, and presenting
|
||||
// only those parts of it that pass the filter to the delegate, so the delegate is
|
||||
// in effect seeing multiple tokenstream snippets. Tokenstreams can't have an initial
|
||||
// position increment of 0, so if the snippet starts on a stacked token we need to
|
||||
// offset it here and then correct the increment back again after delegation
|
||||
boolean adjustPosition = false;
|
||||
if (posIncAtt.getPositionIncrement() == 0) {
|
||||
posIncAtt.setPositionIncrement(1);
|
||||
adjustPosition = true;
|
||||
}
|
||||
lastTokenFiltered = true;
|
||||
state = TokenState.PREBUFFERING;
|
||||
// we determine that the delegate has emitted all the tokens it can at the current
|
||||
|
@ -178,6 +188,10 @@ public abstract class ConditionalTokenFilter extends TokenFilter {
|
|||
boolean more = delegate.incrementToken();
|
||||
if (more) {
|
||||
state = TokenState.DELEGATING;
|
||||
if (adjustPosition) {
|
||||
int posInc = posIncAtt.getPositionIncrement();
|
||||
posIncAtt.setPositionIncrement(posInc - 1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
lastTokenFiltered = false;
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
|
|||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -330,6 +331,44 @@ public class TestConditionalTokenFilter extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(ts, new String[]{"jvboq"});
|
||||
}
|
||||
|
||||
public void testInternalPositionAdjustment() throws IOException {
|
||||
// check that the partial TokenStream sent to the condition filter begins with a posInc of 1,
|
||||
// even if the input stream has a posInc of 0 at that position, and that the filtered stream
|
||||
// has the correct posInc afterwards
|
||||
TokenStream ts = whitespaceMockTokenizer("one two three");
|
||||
ts = new KeywordRepeatFilter(ts);
|
||||
ts = new NonRandomSkippingFilter(ts, PositionAssertingTokenFilter::new, false, true, true, true, true, false);
|
||||
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{ "one", "one", "two", "two", "three", "three" },
|
||||
new int[]{ 1, 0, 1, 0, 1, 0});
|
||||
}
|
||||
|
||||
private static final class PositionAssertingTokenFilter extends TokenFilter {
|
||||
|
||||
boolean reset = false;
|
||||
final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
protected PositionAssertingTokenFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
this.reset = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (reset) {
|
||||
assertEquals(1, posIncAtt.getPositionIncrement());
|
||||
}
|
||||
reset = false;
|
||||
return input.incrementToken();
|
||||
}
|
||||
}
|
||||
|
||||
private static class RandomSkippingFilter extends ConditionalTokenFilter {
|
||||
|
||||
Random random;
|
||||
|
|
Loading…
Reference in New Issue