Fix IndexOutOfBoundsException thrown in DefaultPassageFormatter by unordered matches (#13315)

This commit is contained in:
Stéphane Campinas 2024-06-20 16:36:51 +02:00 committed by GitHub
parent 057cbf3c86
commit d453832bb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 61 additions and 2 deletions

View File

@ -179,6 +179,9 @@ Bug Fixes
* GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those
of DoubleValues#doubleValue(). (Uwe Schindler)
* GITHUB#12431: Fix IndexOutOfBoundsException thrown in DefaultPassageFormatter
by unordered matches. (Stephane Campinas)
Changes in Runtime Behavior
---------------------

View File

@ -194,6 +194,15 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
new PriorityQueue<MatchesIterator>(matches.size()) {
@Override
protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
if (a.startPosition() == -1 && b.startPosition() == -1) {
try {
return a.startOffset() < b.startOffset()
|| (a.startOffset() == b.startOffset() && a.endOffset() < b.endOffset())
|| (a.startOffset() == b.startOffset() && a.endOffset() == b.endOffset());
} catch (IOException e) {
throw new IllegalArgumentException("Failed to retrieve term offset", e);
}
}
return a.startPosition() < b.startPosition()
|| (a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition())
|| (a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition());

View File

@ -45,14 +45,14 @@ public interface MatchesIterator {
boolean next() throws IOException;
/**
* The start position of the current match
* The start position of the current match, or {@code -1} if positions are not available
*
* <p>Should only be called after {@link #next()} has returned {@code true}
*/
int startPosition();
/**
* The end position of the current match
* The end position of the current match, or {@code -1} if positions are not available
*
* <p>Should only be called after {@link #next()} has returned {@code true}
*/

View File

@ -27,6 +27,7 @@ import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterDirectoryReader;
@ -57,6 +58,52 @@ public class TestUnifiedHighlighterTermVec extends UnifiedHighlighterTestBase {
super(randomFieldType(random()));
}
public void testTermVecButNoPositions1() throws Exception {
testTermVecButNoPositions("x", "y", "y x", "<b>y</b> <b>x</b>");
}
public void testTermVecButNoPositions2() throws Exception {
testTermVecButNoPositions("y", "x", "y x", "<b>y</b> <b>x</b>");
}
public void testTermVecButNoPositions3() throws Exception {
testTermVecButNoPositions("zzz", "yyy", "zzz yyy", "<b>zzz</b> <b>yyy</b>");
}
public void testTermVecButNoPositions4() throws Exception {
testTermVecButNoPositions("zzz", "yyy", "yyy zzz", "<b>yyy</b> <b>zzz</b>");
}
public void testTermVecButNoPositions(String aaa, String bbb, String indexed, String expected)
throws Exception {
final FieldType tvNoPosType = new FieldType(TextField.TYPE_STORED);
tvNoPosType.setStoreTermVectors(true);
tvNoPosType.setStoreTermVectorOffsets(true);
tvNoPosType.freeze();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", indexed, tvNoPosType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
try (IndexReader ir = iw.getReader()) {
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", aaa)), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("body", bbb)), BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0], snippets[0].contains(expected));
}
}
public void testFetchTermVecsOncePerDoc() throws IOException {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);