mirror of https://github.com/apache/lucene.git
LUCENE-8697: GraphTokenStreamFiniteStrings correctly handles side paths with gaps
This commit is contained in:
parent
97875af3f9
commit
55b4d2dcaa
|
@ -223,6 +223,9 @@ New Features
|
||||||
* LUCENE-8655: Add a getter in FunctionScoreQuery class in order to access to the
|
* LUCENE-8655: Add a getter in FunctionScoreQuery class in order to access to the
|
||||||
underlying DoubleValuesSource. (Gérald Quaire via Alan Woodward)
|
underlying DoubleValuesSource. (Gérald Quaire via Alan Woodward)
|
||||||
|
|
||||||
|
* LUCENE-8697: GraphTokenStreamFiniteStrings correctly handles side paths
|
||||||
|
containing gaps (Alan Woodward)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.
|
* LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.
|
||||||
|
|
|
@ -211,26 +211,33 @@ public final class GraphTokenStreamFiniteStrings {
|
||||||
int pos = -1;
|
int pos = -1;
|
||||||
int prevIncr = 1;
|
int prevIncr = 1;
|
||||||
int state = -1;
|
int state = -1;
|
||||||
|
int gap = 0;
|
||||||
while (in.incrementToken()) {
|
while (in.incrementToken()) {
|
||||||
int currentIncr = posIncAtt.getPositionIncrement();
|
int currentIncr = posIncAtt.getPositionIncrement();
|
||||||
if (pos == -1 && currentIncr < 1) {
|
if (pos == -1 && currentIncr < 1) {
|
||||||
throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
|
throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
|
||||||
}
|
}
|
||||||
|
|
||||||
// always use inc 1 while building, but save original increment
|
if (currentIncr == 0) {
|
||||||
int incr = Math.min(1, currentIncr);
|
if (gap > 0) {
|
||||||
if (incr > 0) {
|
pos -= gap;
|
||||||
pos += incr;
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
pos++;
|
||||||
|
gap = currentIncr - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int endPos = pos + posLengthAtt.getPositionLength();
|
int endPos = pos + posLengthAtt.getPositionLength() + gap;
|
||||||
while (state < endPos) {
|
while (state < endPos) {
|
||||||
state = builder.createState();
|
state = builder.createState();
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef term = termBytesAtt.getBytesRef();
|
BytesRef term = termBytesAtt.getBytesRef();
|
||||||
int id = getTermID(currentIncr, prevIncr, term);
|
int id = getTermID(currentIncr, prevIncr, term);
|
||||||
|
//System.out.println("Adding transition: " + term.utf8ToString() + "@" + pos + "->" + endPos);
|
||||||
builder.addTransition(pos, endPos, id);
|
builder.addTransition(pos, endPos, id);
|
||||||
|
pos += gap;
|
||||||
|
|
||||||
// only save last increment on non-zero increment in case we have multiple stacked tokens
|
// only save last increment on non-zero increment in case we have multiple stacked tokens
|
||||||
if (currentIncr > 0) {
|
if (currentIncr > 0) {
|
||||||
|
|
|
@ -539,14 +539,16 @@ public class TestGraphTokenStreamFiniteStrings extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMultipleSidePaths() throws Exception {
|
public void testMultipleSidePaths() throws Exception {
|
||||||
|
// 0 1 2 3 4 5 6 7 8
|
||||||
|
// the ny:4/new york wifi:5/wi fi:4 [] wifi:2/wi fi network
|
||||||
TokenStream ts = new CannedTokenStream(
|
TokenStream ts = new CannedTokenStream(
|
||||||
token("the", 1, 1),
|
token("the", 1, 1),
|
||||||
token("ny", 1, 4),
|
token("ny", 1, 4),
|
||||||
token("new", 0, 1),
|
token("new", 0, 1),
|
||||||
token("york", 1, 1),
|
token("york", 1, 1),
|
||||||
token("wifi", 1, 4),
|
token("wifi", 1, 5),
|
||||||
token("wi", 0, 1),
|
token("wi", 0, 1),
|
||||||
token("fi", 1, 3),
|
token("fi", 1, 4),
|
||||||
token("wifi", 2, 2),
|
token("wifi", 2, 2),
|
||||||
token("wi", 0, 1),
|
token("wi", 0, 1),
|
||||||
token("fi", 1, 1),
|
token("fi", 1, 1),
|
||||||
|
@ -596,4 +598,46 @@ public class TestGraphTokenStreamFiniteStrings extends LuceneTestCase {
|
||||||
terms = graph.getTerms("field", 7);
|
terms = graph.getTerms("field", 7);
|
||||||
assertArrayEquals(terms, new Term[] {new Term("field", "network")});
|
assertArrayEquals(terms, new Term[] {new Term("field", "network")});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSidePathWithGap() throws Exception {
|
||||||
|
// 0 1 2 3 4 5
|
||||||
|
// king alfred:3/alfred [] [] great/awesome ruled
|
||||||
|
CannedTokenStream cts = new CannedTokenStream(
|
||||||
|
token("king", 1, 1),
|
||||||
|
token("alfred", 1, 4),
|
||||||
|
token("alfred", 0, 1),
|
||||||
|
token("great", 3, 1),
|
||||||
|
token("awesome", 0, 1),
|
||||||
|
token("ruled", 1, 1)
|
||||||
|
);
|
||||||
|
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(cts);
|
||||||
|
Iterator<TokenStream> it = graph.getFiniteStrings();
|
||||||
|
assertTrue(it.hasNext());
|
||||||
|
assertTokenStream(it.next(), new String[]{ "king", "alfred", "ruled" }, new int[]{ 1, 1, 1 });
|
||||||
|
assertTrue(it.hasNext());
|
||||||
|
assertTokenStream(it.next(), new String[]{ "king", "alfred", "great", "ruled"}, new int[]{ 1, 1, 3, 1 });
|
||||||
|
assertTrue(it.hasNext());
|
||||||
|
assertTokenStream(it.next(), new String[]{ "king", "alfred", "awesome", "ruled"}, new int[]{ 1, 1, 3, 1 });
|
||||||
|
assertFalse(it.hasNext());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMultipleSidePathsWithGaps() throws Exception {
|
||||||
|
// king alfred:4/alfred [] [] saxons:3 [] wessex ruled
|
||||||
|
CannedTokenStream cts = new CannedTokenStream(
|
||||||
|
token("king", 1, 1),
|
||||||
|
token("alfred", 1, 4),
|
||||||
|
token("alfred", 0, 1),
|
||||||
|
token("saxons", 3, 3),
|
||||||
|
token("wessex", 2, 1),
|
||||||
|
token("ruled", 1, 1)
|
||||||
|
);
|
||||||
|
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(cts);
|
||||||
|
Iterator<TokenStream> it = graph.getFiniteStrings();
|
||||||
|
assertTrue(it.hasNext());
|
||||||
|
assertTokenStream(it.next(), new String[]{ "king", "alfred", "wessex", "ruled" }, new int[]{ 1, 1, 2, 1 });
|
||||||
|
assertTrue(it.hasNext());
|
||||||
|
assertTokenStream(it.next(), new String[]{ "king", "alfred", "saxons", "ruled" }, new int[]{ 1, 1, 3, 1 });
|
||||||
|
assertFalse(it.hasNext());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue