LUCENE-10229: return -1 for unknown offsets in ExtendedIntervalsSource. Modify highlighting to work properly with or without offsets (depending on their availability). (#803)

Thanks @romseygeek
This commit is contained in:
Dawid Weiss 2022-04-11 11:52:31 +02:00 committed by GitHub
parent ba1062620c
commit 2c1f938139
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 330 additions and 248 deletions

View File

@ -84,6 +84,9 @@ New Features
Improvements
---------------------
* LUCENE-10229: return -1 for unknown offsets in ExtendedIntervalsSource. Modify highlighting to
work properly with or without offsets. (Dawid Weiss)
* LUCENE-10494: Implement method to bulk add all collection elements to a PriorityQueue.
(Bauyrzhan Sakhariyev)

View File

@ -23,6 +23,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.PrimitiveIterator;
import java.util.Set;
import java.util.TreeMap;
@ -43,6 +44,7 @@ import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.IOSupplier;
/**
* Utility class to compute a list of "match regions" for a given query, searcher and document(s)
@ -75,7 +77,7 @@ public class MatchRegionRetriever {
*/
@FunctionalInterface
public interface FieldValueProvider {
List<CharSequence> getValues(String field);
List<CharSequence> getValues(String field) throws IOException;
}
/**
@ -131,9 +133,7 @@ public class MatchRegionRetriever {
preloadFields = new HashSet<>();
offsetStrategies.forEach(
(field, strategy) -> {
if (strategy.requiresDocument()) {
preloadFields.add(field);
}
preloadFields.add(field);
});
// Only preload those field values that can be affected by the query and are required
@ -181,17 +181,12 @@ public class MatchRegionRetriever {
int contextRelativeDocId = docId - currentContext.docBase;
// Only preload fields we may potentially need.
FieldValueProvider documentSupplier;
if (preloadFields.isEmpty()) {
documentSupplier = null;
} else {
Document doc = currentContext.reader().document(contextRelativeDocId, preloadFields);
documentSupplier = new DocumentFieldValueProvider(doc);
}
FieldValueProvider docFieldsSupplier =
new DocumentFieldValueProvider(currentContext, contextRelativeDocId, preloadFields);
highlights.clear();
highlightDocument(
currentContext, contextRelativeDocId, documentSupplier, (field) -> true, highlights);
currentContext, contextRelativeDocId, docFieldsSupplier, (field) -> true, highlights);
consumer.accept(docId, currentContext.reader(), contextRelativeDocId, highlights);
}
}
@ -262,7 +257,7 @@ public class MatchRegionRetriever {
switch (fieldInfo.getIndexOptions()) {
case DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS:
return new OffsetsFromMatchIterator(field);
return new OffsetsFromMatchIterator(field, new OffsetsFromPositions(field, analyzer));
case DOCS_AND_FREQS_AND_POSITIONS:
return new OffsetsFromPositions(field, analyzer);
@ -293,14 +288,20 @@ public class MatchRegionRetriever {
/** Implements {@link FieldValueProvider} wrapping a preloaded {@link Document}. */
private static final class DocumentFieldValueProvider implements FieldValueProvider {
private final Document doc;
private final IOSupplier<Document> docSupplier;
private Document doc;
public DocumentFieldValueProvider(Document doc) {
this.doc = doc;
public DocumentFieldValueProvider(
LeafReaderContext currentContext, int docId, Set<String> preloadFields) {
docSupplier = () -> currentContext.reader().document(docId, preloadFields);
}
@Override
public List<CharSequence> getValues(String field) {
public List<CharSequence> getValues(String field) throws IOException {
if (doc == null) {
doc = Objects.requireNonNull(docSupplier.get());
}
return Arrays.asList(doc.getValues(field));
}
}

View File

@ -19,29 +19,53 @@ package org.apache.lucene.search.matchhighlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.search.MatchesIterator;
/** This strategy retrieves offsets directly from {@link MatchesIterator}. */
/**
* This strategy retrieves offsets directly from {@link MatchesIterator}, if they are available,
* otherwise it falls back to using {@link OffsetsFromPositions}.
*/
public final class OffsetsFromMatchIterator implements OffsetsRetrievalStrategy {
private final String field;
private final OffsetsFromPositions noOffsetsFallback;
OffsetsFromMatchIterator(String field) {
OffsetsFromMatchIterator(String field, OffsetsFromPositions noOffsetsFallback) {
this.field = field;
this.noOffsetsFallback = Objects.requireNonNull(noOffsetsFallback);
}
@Override
public List<OffsetRange> get(
MatchesIterator matchesIterator, MatchRegionRetriever.FieldValueProvider doc)
throws IOException {
ArrayList<OffsetRange> ranges = new ArrayList<>();
ArrayList<OffsetRange> positionRanges = new ArrayList<>();
ArrayList<OffsetRange> offsetRanges = new ArrayList<>();
while (matchesIterator.next()) {
int from = matchesIterator.startOffset();
int to = matchesIterator.endOffset();
if (from < 0 || to < 0) {
throw new IOException("Matches API returned negative offsets for field: " + field);
int fromPosition = matchesIterator.startPosition();
int toPosition = matchesIterator.endPosition();
if (fromPosition < 0 || toPosition < 0) {
throw new IOException("Matches API returned negative positions for field: " + field);
}
positionRanges.add(new OffsetRange(fromPosition, toPosition));
if (offsetRanges != null) {
int from = matchesIterator.startOffset();
int to = matchesIterator.endOffset();
if (from < 0 || to < 0) {
// At least one offset isn't available. Fallback to just positions.
offsetRanges = null;
} else {
offsetRanges.add(new OffsetRange(from, to));
}
}
ranges.add(new OffsetRange(from, to));
}
return ranges;
// Use the fallback conversion from positions if not all offsets were available.
if (offsetRanges == null) {
return noOffsetsFallback.convertPositionsToOffsets(positionRanges, doc.getValues(field));
} else {
return offsetRanges;
}
}
}

View File

@ -56,20 +56,11 @@ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy {
}
// Convert from positions to offsets.
return convertPositionsToOffsets(positionRanges, analyzer, field, doc.getValues(field));
return convertPositionsToOffsets(positionRanges, doc.getValues(field));
}
@Override
public boolean requiresDocument() {
return true;
}
private static List<OffsetRange> convertPositionsToOffsets(
ArrayList<OffsetRange> positionRanges,
Analyzer analyzer,
String fieldName,
List<CharSequence> values)
throws IOException {
List<OffsetRange> convertPositionsToOffsets(
ArrayList<OffsetRange> positionRanges, List<CharSequence> values) throws IOException {
if (positionRanges.isEmpty()) {
return positionRanges;
@ -106,7 +97,7 @@ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy {
final String value = values.get(valueIndex).toString();
final boolean lastValue = valueIndex + 1 == max;
TokenStream ts = analyzer.tokenStream(fieldName, value);
TokenStream ts = analyzer.tokenStream(field, value);
OffsetAttribute offsetAttr = ts.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute posAttr = ts.getAttribute(PositionIncrementAttribute.class);
ts.reset();
@ -144,8 +135,8 @@ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy {
}
}
ts.end();
position += posAttr.getPositionIncrement() + analyzer.getPositionIncrementGap(fieldName);
valueOffset += offsetAttr.endOffset() + analyzer.getOffsetGap(fieldName);
position += posAttr.getPositionIncrement() + analyzer.getPositionIncrementGap(field);
valueOffset += offsetAttr.endOffset() + analyzer.getOffsetGap(field);
ts.close();
}

View File

@ -91,9 +91,4 @@ public final class OffsetsFromTokens implements OffsetsRetrievalStrategy {
}
return ranges;
}
@Override
public boolean requiresDocument() {
return true;
}
}

View File

@ -66,9 +66,4 @@ public final class OffsetsFromValues implements OffsetsRetrievalStrategy {
}
return ranges;
}
@Override
public boolean requiresDocument() {
return true;
}
}

View File

@ -30,9 +30,4 @@ public interface OffsetsRetrievalStrategy {
List<OffsetRange> get(
MatchesIterator matchesIterator, MatchRegionRetriever.FieldValueProvider doc)
throws IOException;
/** Whether this strategy requires document field access. */
default boolean requiresDocument() {
return false;
}
}

View File

@ -337,179 +337,163 @@ public class TestMatchHighlighter extends LuceneTestCase {
String inputDocument = "The quick brown fox jumps over the lazy dog";
List<String[]> queryResultPairs =
new ArrayList<>(
Arrays.asList(
new String[][] {
{
"fn:ordered(brown dog)",
"0. %s: The quick >brown fox jumps over the lazy dog<"
},
{
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:containedBy(fox fn:ordered(brown fox dog))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:atLeast(2 quick fox \"furry dog\")",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:or(quick \"fox\")",
"0. %s: The >quick< brown >fox< jumps over the lazy dog"
},
{"fn:or(\"quick fox\")"},
{
"fn:phrase(quick brown fox)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
{"fn:fuzzyTerm(fxo)", "0. %s: The quick brown >fox< jumps over the lazy dog"},
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
{
"fn:phrase(fn:ordered(quick fox) jumps)",
"0. %s: The >quick brown fox jumps< over the lazy dog"
},
{
"fn:ordered(quick jumps dog)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:ordered(quick fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:ordered(quick jumps fn:or(fox dog))",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(dog jumps quick)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(fn:or(fox dog) quick)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox jumps< over the lazy dog"
},
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{
"fn:before(fn:or(brown lazy) fox)",
"0. %s: The quick >brown< fox jumps over the lazy dog"
},
{
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fox)",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
},
{
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
"0. %s: The quick brown >fox jumps over the lazy dog<"
},
{
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
"0. %s: The quick brown fox jumps over >the lazy dog<"
},
{
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
},
}));
// TODO: LUCENE-10229: The test cases below do not work for fields enabled with offset yet:
// mainly "extend".
if (field.equals(FLD_TEXT2)) {
queryResultPairs.addAll(
Arrays.asList(
new String[][] {
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
{
"fn:extend(fn:or(dog fox) 2 0)",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
}
}));
}
Arrays.asList(
new String[][] {
{"fn:ordered(brown dog)", "0. %s: The quick >brown fox jumps over the lazy dog<"},
{
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:containedBy(fox fn:ordered(brown fox dog))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:atLeast(2 quick fox \"furry dog\")",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
},
{
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
},
{"fn:or(quick \"fox\")", "0. %s: The >quick< brown >fox< jumps over the lazy dog"},
{"fn:or(\"quick fox\")"},
{
"fn:phrase(quick brown fox)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
{"fn:fuzzyTerm(fxo)", "0. %s: The quick brown >fox< jumps over the lazy dog"},
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
{
"fn:phrase(fn:ordered(quick fox) jumps)",
"0. %s: The >quick brown fox jumps< over the lazy dog"
},
{
"fn:ordered(quick jumps dog)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:ordered(quick fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:ordered(quick jumps fn:or(fox dog))",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(dog jumps quick)",
"0. %s: The >quick brown fox jumps over the lazy dog<"
},
{
"fn:unordered(fn:or(fox dog) quick)",
"0. %s: The >quick brown fox< jumps over the lazy dog"
},
{
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox jumps< over the lazy dog"
},
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
{
"fn:before(fn:or(brown lazy) fox)",
"0. %s: The quick >brown< fox jumps over the lazy dog"
},
{
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fox)",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
"0. %s: The quick brown fox jumps over the >lazy< dog"
},
{
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
},
{
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
"0. %s: The quick brown >fox jumps over the lazy dog<"
},
{
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
"0. %s: The quick brown fox jumps over >the lazy dog<"
},
{
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
"0. %s: The quick >brown fox< jumps over the lazy dog"
},
{
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
},
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
{
"fn:extend(fn:or(dog fox) 2 0)",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown fox jumps over the lazy >dog<"
},
{
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
"0. %s: The quick brown >fox< jumps over the lazy dog"
},
{
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
},
{
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
"0. %s: The quick brown fox jumps over the >lazy dog<"
}
});
// Verify assertions.
new IndexBuilder(this::toField)

View File

@ -60,7 +60,11 @@ class ConjunctionMatchesIterator implements IntervalMatchesIterator {
public int startOffset() throws IOException {
int start = Integer.MAX_VALUE;
for (MatchesIterator s : subs) {
start = Math.min(start, s.startOffset());
int v = s.startOffset();
if (v == -1) {
return -1;
}
start = Math.min(start, v);
}
return start;
}
@ -69,7 +73,11 @@ class ConjunctionMatchesIterator implements IntervalMatchesIterator {
public int endOffset() throws IOException {
int end = -1;
for (MatchesIterator s : subs) {
end = Math.max(end, s.endOffset());
int v = s.endOffset();
if (v == -1) {
return -1;
}
end = Math.max(end, v);
}
return end;
}

View File

@ -23,6 +23,8 @@ import java.util.Collections;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
class ExtendedIntervalsSource extends IntervalsSource {
@ -53,9 +55,66 @@ class ExtendedIntervalsSource extends IntervalsSource {
if (in == null) {
return null;
}
IntervalMatchesIterator inNoOffsets =
new IntervalMatchesIterator() {
IntervalMatchesIterator delegate = in;
@Override
public int gaps() {
return delegate.gaps();
}
@Override
public int width() {
return delegate.width();
}
@Override
public boolean next() throws IOException {
return delegate.next();
}
@Override
public int startPosition() {
return delegate.startPosition();
}
@Override
public int endPosition() {
return delegate.endPosition();
}
@Override
public int startOffset() throws IOException {
// We could return this:
// before > 0 ? -1 : in.startOffset();
// but keep it consistent for start/end offset:
return -1;
}
@Override
public int endOffset() throws IOException {
// We could return this:
// after > 0 ? -1 : in.startOffset();
// but keep it consistent for start/end offset:
return -1;
}
@Override
public MatchesIterator getSubMatches() throws IOException {
return delegate.getSubMatches();
}
@Override
public Query getQuery() {
return delegate.getQuery();
}
};
IntervalIterator wrapped =
new ExtendedIntervalIterator(IntervalMatches.wrapMatches(in, doc), before, after);
return IntervalMatches.asMatches(wrapped, in, doc);
new ExtendedIntervalIterator(IntervalMatches.wrapMatches(inNoOffsets, doc), before, after);
return IntervalMatches.asMatches(wrapped, inNoOffsets, doc);
}
@Override

View File

@ -69,7 +69,8 @@ public class TestIntervals extends LuceneTestCase {
"Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the fraggle",
"Nor here, nowt hot going on in pease this one",
"Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve",
"Porridge is great"
"Porridge is great",
""
};
// 0 1 2 3 4 5 6 7 8 9
@ -81,7 +82,8 @@ public class TestIntervals extends LuceneTestCase {
"a b a c b a b c",
"So thrice five miles of fertile ground",
"Pease hot porridge porridge",
"w1 w2 w3 w4 w1 w6 w3 w8 w4 w7 w1 w6"
"w1 w2 w3 w4 w1 w6 w3 w8 w4 w7 w1 w6",
"the quick brown fox jumps over the lazy dog"
};
private static Directory directory;
@ -119,8 +121,13 @@ public class TestIntervals extends LuceneTestCase {
IOUtils.close(searcher.getIndexReader(), directory);
}
/**
* @param expectedIntervals An array of expected position intervals for each document. Each
* sub-array contains a number of pairs corresponding to (start, end) positions of each
* interval.
*/
private void checkIntervals(
IntervalsSource source, String field, int expectedMatchCount, int[][] expected)
IntervalsSource source, String field, int expectedMatchCount, int[][] expectedIntervals)
throws IOException {
int matchedDocs = 0;
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
@ -140,13 +147,17 @@ public class TestIntervals extends LuceneTestCase {
assertEquals(-1, intervals.start());
assertEquals(-1, intervals.end());
while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) {
if (i >= expected[id].length) {
if (i >= expectedIntervals[id].length) {
fail("Unexpected match in doc " + id + ": " + intervals);
}
assertEquals(source + ": wrong start value in doc " + id, expected[id][i], pos);
assertEquals(
"start() != pos returned from nextInterval()", expected[id][i], intervals.start());
assertEquals("Wrong end value in doc " + id, expected[id][i + 1], intervals.end());
source + ": wrong start value in doc " + id, expectedIntervals[id][i], pos);
assertEquals(
"start() != pos returned from nextInterval()",
expectedIntervals[id][i],
intervals.start());
assertEquals(
"Wrong end value in doc " + id, expectedIntervals[id][i + 1], intervals.end());
i += 2;
assertTrue(mi.next());
assertEquals(
@ -158,7 +169,8 @@ public class TestIntervals extends LuceneTestCase {
intervals.end(),
mi.endPosition());
}
assertEquals(source + ": wrong number of endpoints in doc " + id, expected[id].length, i);
assertEquals(
source + ": wrong number of endpoints in doc " + id, expectedIntervals[id].length, i);
assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.start());
assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.end());
if (i > 0) {
@ -168,7 +180,11 @@ public class TestIntervals extends LuceneTestCase {
assertNull("Expected null matches iterator on doc " + id, mi);
}
} else {
assertEquals(0, expected[id].length);
// Assume empty matches if the expected array of IDs is shorter
// than the number of docs.
if (expectedIntervals.length > id) {
assertEquals(0, expectedIntervals[id].length);
}
assertNull(mi);
}
}
@ -220,10 +236,10 @@ public class TestIntervals extends LuceneTestCase {
private void assertMatch(MatchesIterator mi, int start, int end, int startOffset, int endOffset)
throws IOException {
assertTrue(mi.next());
assertEquals(start, mi.startPosition());
assertEquals(end, mi.endPosition());
assertEquals(startOffset, mi.startOffset());
assertEquals(endOffset, mi.endOffset());
assertEquals("Start position", start, mi.startPosition());
assertEquals("End position", end, mi.endPosition());
assertEquals("Start offset", startOffset, mi.startOffset());
assertEquals("End offset", endOffset, mi.endOffset());
}
private void assertGaps(IntervalsSource source, int doc, String field, int[] expectedGaps)
@ -253,6 +269,17 @@ public class TestIntervals extends LuceneTestCase {
e.getMessage());
}
public void testExtends() throws IOException {
IntervalsSource source = Intervals.extend(Intervals.term("fox"), 1, 2);
checkIntervals(source, "field2", 1, new int[][] {{}, {}, {}, {}, {}, {}, {2, 5}});
MatchesIterator mi = getMatches(source, 6, "field2");
// LUCENE-10229: we can't report offsets for the "extended" position range because this
// information
// is not available from term positions index alone. Report the truth (-1 - not available).
assertMatch(mi, 2, 5, -1, -1);
}
public void testTermQueryIntervals() throws IOException {
IntervalsSource source = Intervals.term("porridge");
checkIntervals(
@ -840,11 +867,11 @@ public class TestIntervals extends LuceneTestCase {
assertEquals(5, source.minExtent());
MatchesIterator mi = getMatches(source, 1, "field1");
assertMatch(mi, 3, 7, 20, 55);
assertMatch(mi, 3, 7, -1, -1);
MatchesIterator sub = mi.getSubMatches();
assertNotNull(sub);
assertMatch(sub, 3, 3, 20, 25);
assertMatch(sub, 4, 6, 35, 39);
assertMatch(sub, 4, 6, -1, -1);
assertMatch(sub, 7, 7, 47, 55);
source = Intervals.extend(Intervals.term("w1"), 5, Integer.MAX_VALUE);