mirror of https://github.com/apache/lucene.git
LUCENE-8650: Fix end() and reset() in ConcatenatingTokenStream
This commit is contained in:
parent
f543b4e1f4
commit
7713a4f245
|
@ -286,6 +286,10 @@ Bug fixes:
|
||||||
* LUCENE-8654: Polygon2D#relateTriangle returns the wrong answer if polygon is inside
|
* LUCENE-8654: Polygon2D#relateTriangle returns the wrong answer if polygon is inside
|
||||||
the triangle. (Ignacio Vera)
|
the triangle. (Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-8650: ConcatenatingTokenStream did not correctly clear its state in reset(), and
|
||||||
|
was not propagating final position increments from its child streams correctly.
|
||||||
|
(Dan Meehl, Alan Woodward)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-8026: ExitableDirectoryReader may now time out queries that run on
|
* LUCENE-8026: ExitableDirectoryReader may now time out queries that run on
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -39,10 +40,13 @@ public final class ConcatenatingTokenStream extends TokenStream {
|
||||||
|
|
||||||
private final TokenStream[] sources;
|
private final TokenStream[] sources;
|
||||||
private final OffsetAttribute[] sourceOffsets;
|
private final OffsetAttribute[] sourceOffsets;
|
||||||
|
private final PositionIncrementAttribute[] sourceIncrements;
|
||||||
private final OffsetAttribute offsetAtt;
|
private final OffsetAttribute offsetAtt;
|
||||||
|
private final PositionIncrementAttribute posIncAtt;
|
||||||
|
|
||||||
private int currentSource;
|
private int currentSource;
|
||||||
private int offsetIncrement;
|
private int offsetIncrement;
|
||||||
|
private int initialPositionIncrement = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new ConcatenatingTokenStream from a set of inputs
|
* Create a new ConcatenatingTokenStream from a set of inputs
|
||||||
|
@ -52,9 +56,12 @@ public final class ConcatenatingTokenStream extends TokenStream {
|
||||||
super(combineSources(sources));
|
super(combineSources(sources));
|
||||||
this.sources = sources;
|
this.sources = sources;
|
||||||
this.offsetAtt = addAttribute(OffsetAttribute.class);
|
this.offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
this.posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
this.sourceOffsets = new OffsetAttribute[sources.length];
|
this.sourceOffsets = new OffsetAttribute[sources.length];
|
||||||
|
this.sourceIncrements = new PositionIncrementAttribute[sources.length];
|
||||||
for (int i = 0; i < sources.length; i++) {
|
for (int i = 0; i < sources.length; i++) {
|
||||||
this.sourceOffsets[i] = sources[i].addAttribute(OffsetAttribute.class);
|
this.sourceOffsets[i] = sources[i].addAttribute(OffsetAttribute.class);
|
||||||
|
this.sourceIncrements[i] = sources[i].addAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,19 +85,26 @@ public final class ConcatenatingTokenStream extends TokenStream {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
|
boolean newSource = false;
|
||||||
while (sources[currentSource].incrementToken() == false) {
|
while (sources[currentSource].incrementToken() == false) {
|
||||||
if (currentSource >= sources.length - 1)
|
if (currentSource >= sources.length - 1)
|
||||||
return false;
|
return false;
|
||||||
sources[currentSource].end();
|
sources[currentSource].end();
|
||||||
|
initialPositionIncrement = sourceIncrements[currentSource].getPositionIncrement();
|
||||||
OffsetAttribute att = sourceOffsets[currentSource];
|
OffsetAttribute att = sourceOffsets[currentSource];
|
||||||
if (att != null)
|
if (att != null)
|
||||||
offsetIncrement += att.endOffset();
|
offsetIncrement += att.endOffset();
|
||||||
currentSource++;
|
currentSource++;
|
||||||
|
newSource = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
sources[currentSource].copyTo(this);
|
sources[currentSource].copyTo(this);
|
||||||
offsetAtt.setOffset(offsetAtt.startOffset() + offsetIncrement, offsetAtt.endOffset() + offsetIncrement);
|
offsetAtt.setOffset(offsetAtt.startOffset() + offsetIncrement, offsetAtt.endOffset() + offsetIncrement);
|
||||||
|
if (newSource) {
|
||||||
|
int posInc = posIncAtt.getPositionIncrement();
|
||||||
|
posIncAtt.setPositionIncrement(posInc + initialPositionIncrement);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -98,7 +112,11 @@ public final class ConcatenatingTokenStream extends TokenStream {
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
sources[currentSource].end();
|
sources[currentSource].end();
|
||||||
|
int finalOffset = sourceOffsets[currentSource].endOffset() + offsetIncrement;
|
||||||
|
int finalPosInc = sourceIncrements[currentSource].getPositionIncrement();
|
||||||
super.end();
|
super.end();
|
||||||
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
posIncAtt.setPositionIncrement(finalPosInc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -107,6 +125,8 @@ public final class ConcatenatingTokenStream extends TokenStream {
|
||||||
source.reset();
|
source.reset();
|
||||||
}
|
}
|
||||||
super.reset();
|
super.reset();
|
||||||
|
currentSource = 0;
|
||||||
|
offsetIncrement = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,7 +21,9 @@ import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
@ -46,6 +48,33 @@ public class TestConcatenatingTokenStream extends BaseTokenStreamTestCase {
|
||||||
new int[]{ 0, 6, 12, 19, 25, 31 },
|
new int[]{ 0, 6, 12, 19, 25, 31 },
|
||||||
new int[]{ 5, 11, 18, 24, 30, 36 });
|
new int[]{ 5, 11, 18, 24, 30, 36 });
|
||||||
|
|
||||||
|
// test re-use
|
||||||
|
first.setReader(new StringReader("first words "));
|
||||||
|
second.setReader(new StringReader("second words"));
|
||||||
|
third.setReader(new StringReader(" third words"));
|
||||||
|
assertTokenStreamContents(ts,
|
||||||
|
new String[] { "first", "words", "second", "words", "third", "words" },
|
||||||
|
new int[]{ 0, 6, 12, 19, 25, 31 },
|
||||||
|
new int[]{ 5, 11, 18, 24, 30, 36 },
|
||||||
|
new int[]{ 1, 1, 1, 1, 1, 1 });
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOffsetGaps() throws IOException {
|
||||||
|
CannedTokenStream cts1 = new CannedTokenStream(2, 10,
|
||||||
|
new Token("a", 0, 1), new Token("b", 2, 3));
|
||||||
|
CannedTokenStream cts2 = new CannedTokenStream(2, 10,
|
||||||
|
new Token("c", 0, 1), new Token("d", 2, 3));
|
||||||
|
|
||||||
|
TokenStream ts = new ConcatenatingTokenStream(cts1, cts2);
|
||||||
|
assertTokenStreamContents(ts,
|
||||||
|
new String[] { "a", "b", "c", "d" },
|
||||||
|
new int[]{ 0, 2, 10, 12 },
|
||||||
|
new int[]{ 1, 3, 11, 13 },
|
||||||
|
null,
|
||||||
|
new int[]{ 1, 1, 3, 1 },
|
||||||
|
null, 20, 2, null, false, null
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInconsistentAttributes() throws IOException {
|
public void testInconsistentAttributes() throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue