mirror of https://github.com/apache/lucene.git
LUCENE-10229: return -1 for unknown offsets in ExtendedIntervalsSource. Modify highlighting to work properly with or without offsets (depending on their availability). (#803)
Thanks @romseygeek
This commit is contained in:
parent
ba1062620c
commit
2c1f938139
|
@ -84,6 +84,9 @@ New Features
|
|||
Improvements
|
||||
---------------------
|
||||
|
||||
* LUCENE-10229: return -1 for unknown offsets in ExtendedIntervalsSource. Modify highlighting to
|
||||
work properly with or without offsets. (Dawid Weiss)
|
||||
|
||||
* LUCENE-10494: Implement method to bulk add all collection elements to a PriorityQueue.
|
||||
(Bauyrzhan Sakhariyev)
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.PrimitiveIterator;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
@ -43,6 +44,7 @@ import org.apache.lucene.search.QueryVisitor;
|
|||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.IOSupplier;
|
||||
|
||||
/**
|
||||
* Utility class to compute a list of "match regions" for a given query, searcher and document(s)
|
||||
|
@ -75,7 +77,7 @@ public class MatchRegionRetriever {
|
|||
*/
|
||||
@FunctionalInterface
|
||||
public interface FieldValueProvider {
|
||||
List<CharSequence> getValues(String field);
|
||||
List<CharSequence> getValues(String field) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -131,9 +133,7 @@ public class MatchRegionRetriever {
|
|||
preloadFields = new HashSet<>();
|
||||
offsetStrategies.forEach(
|
||||
(field, strategy) -> {
|
||||
if (strategy.requiresDocument()) {
|
||||
preloadFields.add(field);
|
||||
}
|
||||
preloadFields.add(field);
|
||||
});
|
||||
|
||||
// Only preload those field values that can be affected by the query and are required
|
||||
|
@ -181,17 +181,12 @@ public class MatchRegionRetriever {
|
|||
int contextRelativeDocId = docId - currentContext.docBase;
|
||||
|
||||
// Only preload fields we may potentially need.
|
||||
FieldValueProvider documentSupplier;
|
||||
if (preloadFields.isEmpty()) {
|
||||
documentSupplier = null;
|
||||
} else {
|
||||
Document doc = currentContext.reader().document(contextRelativeDocId, preloadFields);
|
||||
documentSupplier = new DocumentFieldValueProvider(doc);
|
||||
}
|
||||
FieldValueProvider docFieldsSupplier =
|
||||
new DocumentFieldValueProvider(currentContext, contextRelativeDocId, preloadFields);
|
||||
|
||||
highlights.clear();
|
||||
highlightDocument(
|
||||
currentContext, contextRelativeDocId, documentSupplier, (field) -> true, highlights);
|
||||
currentContext, contextRelativeDocId, docFieldsSupplier, (field) -> true, highlights);
|
||||
consumer.accept(docId, currentContext.reader(), contextRelativeDocId, highlights);
|
||||
}
|
||||
}
|
||||
|
@ -262,7 +257,7 @@ public class MatchRegionRetriever {
|
|||
|
||||
switch (fieldInfo.getIndexOptions()) {
|
||||
case DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS:
|
||||
return new OffsetsFromMatchIterator(field);
|
||||
return new OffsetsFromMatchIterator(field, new OffsetsFromPositions(field, analyzer));
|
||||
|
||||
case DOCS_AND_FREQS_AND_POSITIONS:
|
||||
return new OffsetsFromPositions(field, analyzer);
|
||||
|
@ -293,14 +288,20 @@ public class MatchRegionRetriever {
|
|||
|
||||
/** Implements {@link FieldValueProvider} wrapping a preloaded {@link Document}. */
|
||||
private static final class DocumentFieldValueProvider implements FieldValueProvider {
|
||||
private final Document doc;
|
||||
private final IOSupplier<Document> docSupplier;
|
||||
private Document doc;
|
||||
|
||||
public DocumentFieldValueProvider(Document doc) {
|
||||
this.doc = doc;
|
||||
public DocumentFieldValueProvider(
|
||||
LeafReaderContext currentContext, int docId, Set<String> preloadFields) {
|
||||
docSupplier = () -> currentContext.reader().document(docId, preloadFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CharSequence> getValues(String field) {
|
||||
public List<CharSequence> getValues(String field) throws IOException {
|
||||
if (doc == null) {
|
||||
doc = Objects.requireNonNull(docSupplier.get());
|
||||
}
|
||||
|
||||
return Arrays.asList(doc.getValues(field));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,29 +19,53 @@ package org.apache.lucene.search.matchhighlight;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
|
||||
/** This strategy retrieves offsets directly from {@link MatchesIterator}. */
|
||||
/**
|
||||
* This strategy retrieves offsets directly from {@link MatchesIterator}, if they are available,
|
||||
* otherwise it falls back to using {@link OffsetsFromPositions}.
|
||||
*/
|
||||
public final class OffsetsFromMatchIterator implements OffsetsRetrievalStrategy {
|
||||
private final String field;
|
||||
private final OffsetsFromPositions noOffsetsFallback;
|
||||
|
||||
OffsetsFromMatchIterator(String field) {
|
||||
OffsetsFromMatchIterator(String field, OffsetsFromPositions noOffsetsFallback) {
|
||||
this.field = field;
|
||||
this.noOffsetsFallback = Objects.requireNonNull(noOffsetsFallback);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<OffsetRange> get(
|
||||
MatchesIterator matchesIterator, MatchRegionRetriever.FieldValueProvider doc)
|
||||
throws IOException {
|
||||
ArrayList<OffsetRange> ranges = new ArrayList<>();
|
||||
ArrayList<OffsetRange> positionRanges = new ArrayList<>();
|
||||
ArrayList<OffsetRange> offsetRanges = new ArrayList<>();
|
||||
while (matchesIterator.next()) {
|
||||
int from = matchesIterator.startOffset();
|
||||
int to = matchesIterator.endOffset();
|
||||
if (from < 0 || to < 0) {
|
||||
throw new IOException("Matches API returned negative offsets for field: " + field);
|
||||
int fromPosition = matchesIterator.startPosition();
|
||||
int toPosition = matchesIterator.endPosition();
|
||||
if (fromPosition < 0 || toPosition < 0) {
|
||||
throw new IOException("Matches API returned negative positions for field: " + field);
|
||||
}
|
||||
positionRanges.add(new OffsetRange(fromPosition, toPosition));
|
||||
|
||||
if (offsetRanges != null) {
|
||||
int from = matchesIterator.startOffset();
|
||||
int to = matchesIterator.endOffset();
|
||||
if (from < 0 || to < 0) {
|
||||
// At least one offset isn't available. Fallback to just positions.
|
||||
offsetRanges = null;
|
||||
} else {
|
||||
offsetRanges.add(new OffsetRange(from, to));
|
||||
}
|
||||
}
|
||||
ranges.add(new OffsetRange(from, to));
|
||||
}
|
||||
return ranges;
|
||||
|
||||
// Use the fallback conversion from positions if not all offsets were available.
|
||||
if (offsetRanges == null) {
|
||||
return noOffsetsFallback.convertPositionsToOffsets(positionRanges, doc.getValues(field));
|
||||
} else {
|
||||
return offsetRanges;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,20 +56,11 @@ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy {
|
|||
}
|
||||
|
||||
// Convert from positions to offsets.
|
||||
return convertPositionsToOffsets(positionRanges, analyzer, field, doc.getValues(field));
|
||||
return convertPositionsToOffsets(positionRanges, doc.getValues(field));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean requiresDocument() {
|
||||
return true;
|
||||
}
|
||||
|
||||
private static List<OffsetRange> convertPositionsToOffsets(
|
||||
ArrayList<OffsetRange> positionRanges,
|
||||
Analyzer analyzer,
|
||||
String fieldName,
|
||||
List<CharSequence> values)
|
||||
throws IOException {
|
||||
List<OffsetRange> convertPositionsToOffsets(
|
||||
ArrayList<OffsetRange> positionRanges, List<CharSequence> values) throws IOException {
|
||||
|
||||
if (positionRanges.isEmpty()) {
|
||||
return positionRanges;
|
||||
|
@ -106,7 +97,7 @@ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy {
|
|||
final String value = values.get(valueIndex).toString();
|
||||
final boolean lastValue = valueIndex + 1 == max;
|
||||
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, value);
|
||||
TokenStream ts = analyzer.tokenStream(field, value);
|
||||
OffsetAttribute offsetAttr = ts.getAttribute(OffsetAttribute.class);
|
||||
PositionIncrementAttribute posAttr = ts.getAttribute(PositionIncrementAttribute.class);
|
||||
ts.reset();
|
||||
|
@ -144,8 +135,8 @@ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy {
|
|||
}
|
||||
}
|
||||
ts.end();
|
||||
position += posAttr.getPositionIncrement() + analyzer.getPositionIncrementGap(fieldName);
|
||||
valueOffset += offsetAttr.endOffset() + analyzer.getOffsetGap(fieldName);
|
||||
position += posAttr.getPositionIncrement() + analyzer.getPositionIncrementGap(field);
|
||||
valueOffset += offsetAttr.endOffset() + analyzer.getOffsetGap(field);
|
||||
ts.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -91,9 +91,4 @@ public final class OffsetsFromTokens implements OffsetsRetrievalStrategy {
|
|||
}
|
||||
return ranges;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean requiresDocument() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,9 +66,4 @@ public final class OffsetsFromValues implements OffsetsRetrievalStrategy {
|
|||
}
|
||||
return ranges;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean requiresDocument() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,9 +30,4 @@ public interface OffsetsRetrievalStrategy {
|
|||
List<OffsetRange> get(
|
||||
MatchesIterator matchesIterator, MatchRegionRetriever.FieldValueProvider doc)
|
||||
throws IOException;
|
||||
|
||||
/** Whether this strategy requires document field access. */
|
||||
default boolean requiresDocument() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -337,179 +337,163 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
String inputDocument = "The quick brown fox jumps over the lazy dog";
|
||||
|
||||
List<String[]> queryResultPairs =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
new String[][] {
|
||||
{
|
||||
"fn:ordered(brown dog)",
|
||||
"0. %s: The quick >brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fox fn:ordered(brown fox dog))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:atLeast(2 quick fox \"furry dog\")",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:or(quick \"fox\")",
|
||||
"0. %s: The >quick< brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{"fn:or(\"quick fox\")"},
|
||||
{
|
||||
"fn:phrase(quick brown fox)",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
|
||||
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
|
||||
{"fn:fuzzyTerm(fxo)", "0. %s: The quick brown >fox< jumps over the lazy dog"},
|
||||
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
|
||||
{
|
||||
"fn:phrase(fn:ordered(quick fox) jumps)",
|
||||
"0. %s: The >quick brown fox jumps< over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick jumps dog)",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick jumps fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:unordered(dog jumps quick)",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:unordered(fn:or(fox dog) quick)",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
|
||||
"0. %s: The quick >brown fox jumps< over the lazy dog"
|
||||
},
|
||||
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
|
||||
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
|
||||
{
|
||||
"fn:before(fn:or(brown lazy) fox)",
|
||||
"0. %s: The quick >brown< fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
|
||||
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:after(fn:or(brown lazy) fox)",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
|
||||
"0. %s: The quick brown >fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
|
||||
"0. %s: The quick brown fox jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
|
||||
"0. %s: The quick >brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
|
||||
"0. %s: The quick >brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
|
||||
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
|
||||
},
|
||||
}));
|
||||
|
||||
// TODO: LUCENE-10229: The test cases below do not work for fields enabled with offset yet:
|
||||
// mainly "extend".
|
||||
if (field.equals(FLD_TEXT2)) {
|
||||
queryResultPairs.addAll(
|
||||
Arrays.asList(
|
||||
new String[][] {
|
||||
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
|
||||
{
|
||||
"fn:extend(fn:or(dog fox) 2 0)",
|
||||
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
}
|
||||
}));
|
||||
}
|
||||
Arrays.asList(
|
||||
new String[][] {
|
||||
{"fn:ordered(brown dog)", "0. %s: The quick >brown fox jumps over the lazy dog<"},
|
||||
{
|
||||
"fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fox fn:ordered(brown fox dog))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:atLeast(2 quick fox \"furry dog\")",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
|
||||
"0. %s: The >quick brown fox< jumps over the >lazy dog<"
|
||||
},
|
||||
{"fn:or(quick \"fox\")", "0. %s: The >quick< brown >fox< jumps over the lazy dog"},
|
||||
{"fn:or(\"quick fox\")"},
|
||||
{
|
||||
"fn:phrase(quick brown fox)",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
|
||||
{"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
|
||||
{"fn:fuzzyTerm(fxo)", "0. %s: The quick brown >fox< jumps over the lazy dog"},
|
||||
{"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
|
||||
{
|
||||
"fn:phrase(fn:ordered(quick fox) jumps)",
|
||||
"0. %s: The >quick brown fox jumps< over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick jumps dog)",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:ordered(quick jumps fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:unordered(dog jumps quick)",
|
||||
"0. %s: The >quick brown fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:unordered(fn:or(fox dog) quick)",
|
||||
"0. %s: The >quick brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
|
||||
"0. %s: The quick >brown fox jumps< over the lazy dog"
|
||||
},
|
||||
{"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
|
||||
{"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
|
||||
{
|
||||
"fn:before(fn:or(brown lazy) fox)",
|
||||
"0. %s: The quick >brown< fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:before(fn:or(brown lazy) fn:or(dog fox))",
|
||||
"0. %s: The quick >brown< fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:after(fn:or(brown lazy) fox)",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:after(fn:or(brown lazy) fn:or(dog fox))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy< dog"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:containing(fn:atLeast(2 quick fox dog) jumps)",
|
||||
"0. %s: The quick brown >fox jumps over the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
|
||||
"0. %s: The quick brown fox jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
|
||||
"0. %s: The quick >brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
|
||||
"0. %s: The quick >brown fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
|
||||
"0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
|
||||
},
|
||||
{"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
|
||||
{
|
||||
"fn:extend(fn:or(dog fox) 2 0)",
|
||||
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
|
||||
"0. %s: The quick brown fox jumps over the lazy >dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
|
||||
"0. %s: The quick brown >fox< jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
|
||||
"0. %s: The >quick brown fox< jumps over >the lazy dog<"
|
||||
},
|
||||
{
|
||||
"fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
|
||||
"0. %s: The quick brown fox jumps over the >lazy dog<"
|
||||
}
|
||||
});
|
||||
|
||||
// Verify assertions.
|
||||
new IndexBuilder(this::toField)
|
||||
|
|
|
@ -60,7 +60,11 @@ class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
|||
public int startOffset() throws IOException {
|
||||
int start = Integer.MAX_VALUE;
|
||||
for (MatchesIterator s : subs) {
|
||||
start = Math.min(start, s.startOffset());
|
||||
int v = s.startOffset();
|
||||
if (v == -1) {
|
||||
return -1;
|
||||
}
|
||||
start = Math.min(start, v);
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
@ -69,7 +73,11 @@ class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
|||
public int endOffset() throws IOException {
|
||||
int end = -1;
|
||||
for (MatchesIterator s : subs) {
|
||||
end = Math.max(end, s.endOffset());
|
||||
int v = s.endOffset();
|
||||
if (v == -1) {
|
||||
return -1;
|
||||
}
|
||||
end = Math.max(end, v);
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ import java.util.Collections;
|
|||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
|
||||
class ExtendedIntervalsSource extends IntervalsSource {
|
||||
|
@ -53,9 +55,66 @@ class ExtendedIntervalsSource extends IntervalsSource {
|
|||
if (in == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
IntervalMatchesIterator inNoOffsets =
|
||||
new IntervalMatchesIterator() {
|
||||
IntervalMatchesIterator delegate = in;
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return delegate.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return delegate.width();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
return delegate.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return delegate.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return delegate.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
// We could return this:
|
||||
// before > 0 ? -1 : in.startOffset();
|
||||
// but keep it consistent for start/end offset:
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
// We could return this:
|
||||
// after > 0 ? -1 : in.startOffset();
|
||||
// but keep it consistent for start/end offset:
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
return delegate.getSubMatches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return delegate.getQuery();
|
||||
}
|
||||
};
|
||||
|
||||
IntervalIterator wrapped =
|
||||
new ExtendedIntervalIterator(IntervalMatches.wrapMatches(in, doc), before, after);
|
||||
return IntervalMatches.asMatches(wrapped, in, doc);
|
||||
new ExtendedIntervalIterator(IntervalMatches.wrapMatches(inNoOffsets, doc), before, after);
|
||||
return IntervalMatches.asMatches(wrapped, inNoOffsets, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -69,7 +69,8 @@ public class TestIntervals extends LuceneTestCase {
|
|||
"Pease porridge cold, pease porridge hot, pease porridge in the pot twelve days old. Some like it cold, some like it hot, some like it in the fraggle",
|
||||
"Nor here, nowt hot going on in pease this one",
|
||||
"Pease porridge hot, pease porridge cold, pease porridge in the pot nine years old. Some like it hot, some like it twelve",
|
||||
"Porridge is great"
|
||||
"Porridge is great",
|
||||
""
|
||||
};
|
||||
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
|
@ -81,7 +82,8 @@ public class TestIntervals extends LuceneTestCase {
|
|||
"a b a c b a b c",
|
||||
"So thrice five miles of fertile ground",
|
||||
"Pease hot porridge porridge",
|
||||
"w1 w2 w3 w4 w1 w6 w3 w8 w4 w7 w1 w6"
|
||||
"w1 w2 w3 w4 w1 w6 w3 w8 w4 w7 w1 w6",
|
||||
"the quick brown fox jumps over the lazy dog"
|
||||
};
|
||||
|
||||
private static Directory directory;
|
||||
|
@ -119,8 +121,13 @@ public class TestIntervals extends LuceneTestCase {
|
|||
IOUtils.close(searcher.getIndexReader(), directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param expectedIntervals An array of expected position intervals for each document. Each
|
||||
* sub-array contains a number of pairs corresponding to (start, end) positions of each
|
||||
* interval.
|
||||
*/
|
||||
private void checkIntervals(
|
||||
IntervalsSource source, String field, int expectedMatchCount, int[][] expected)
|
||||
IntervalsSource source, String field, int expectedMatchCount, int[][] expectedIntervals)
|
||||
throws IOException {
|
||||
int matchedDocs = 0;
|
||||
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
|
||||
|
@ -140,13 +147,17 @@ public class TestIntervals extends LuceneTestCase {
|
|||
assertEquals(-1, intervals.start());
|
||||
assertEquals(-1, intervals.end());
|
||||
while ((pos = intervals.nextInterval()) != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (i >= expected[id].length) {
|
||||
if (i >= expectedIntervals[id].length) {
|
||||
fail("Unexpected match in doc " + id + ": " + intervals);
|
||||
}
|
||||
assertEquals(source + ": wrong start value in doc " + id, expected[id][i], pos);
|
||||
assertEquals(
|
||||
"start() != pos returned from nextInterval()", expected[id][i], intervals.start());
|
||||
assertEquals("Wrong end value in doc " + id, expected[id][i + 1], intervals.end());
|
||||
source + ": wrong start value in doc " + id, expectedIntervals[id][i], pos);
|
||||
assertEquals(
|
||||
"start() != pos returned from nextInterval()",
|
||||
expectedIntervals[id][i],
|
||||
intervals.start());
|
||||
assertEquals(
|
||||
"Wrong end value in doc " + id, expectedIntervals[id][i + 1], intervals.end());
|
||||
i += 2;
|
||||
assertTrue(mi.next());
|
||||
assertEquals(
|
||||
|
@ -158,7 +169,8 @@ public class TestIntervals extends LuceneTestCase {
|
|||
intervals.end(),
|
||||
mi.endPosition());
|
||||
}
|
||||
assertEquals(source + ": wrong number of endpoints in doc " + id, expected[id].length, i);
|
||||
assertEquals(
|
||||
source + ": wrong number of endpoints in doc " + id, expectedIntervals[id].length, i);
|
||||
assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.start());
|
||||
assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.end());
|
||||
if (i > 0) {
|
||||
|
@ -168,7 +180,11 @@ public class TestIntervals extends LuceneTestCase {
|
|||
assertNull("Expected null matches iterator on doc " + id, mi);
|
||||
}
|
||||
} else {
|
||||
assertEquals(0, expected[id].length);
|
||||
// Assume empty matches if the expected array of IDs is shorter
|
||||
// than the number of docs.
|
||||
if (expectedIntervals.length > id) {
|
||||
assertEquals(0, expectedIntervals[id].length);
|
||||
}
|
||||
assertNull(mi);
|
||||
}
|
||||
}
|
||||
|
@ -220,10 +236,10 @@ public class TestIntervals extends LuceneTestCase {
|
|||
private void assertMatch(MatchesIterator mi, int start, int end, int startOffset, int endOffset)
|
||||
throws IOException {
|
||||
assertTrue(mi.next());
|
||||
assertEquals(start, mi.startPosition());
|
||||
assertEquals(end, mi.endPosition());
|
||||
assertEquals(startOffset, mi.startOffset());
|
||||
assertEquals(endOffset, mi.endOffset());
|
||||
assertEquals("Start position", start, mi.startPosition());
|
||||
assertEquals("End position", end, mi.endPosition());
|
||||
assertEquals("Start offset", startOffset, mi.startOffset());
|
||||
assertEquals("End offset", endOffset, mi.endOffset());
|
||||
}
|
||||
|
||||
private void assertGaps(IntervalsSource source, int doc, String field, int[] expectedGaps)
|
||||
|
@ -253,6 +269,17 @@ public class TestIntervals extends LuceneTestCase {
|
|||
e.getMessage());
|
||||
}
|
||||
|
||||
public void testExtends() throws IOException {
|
||||
IntervalsSource source = Intervals.extend(Intervals.term("fox"), 1, 2);
|
||||
checkIntervals(source, "field2", 1, new int[][] {{}, {}, {}, {}, {}, {}, {2, 5}});
|
||||
MatchesIterator mi = getMatches(source, 6, "field2");
|
||||
|
||||
// LUCENE-10229: we can't report offsets for the "extended" position range because this
|
||||
// information
|
||||
// is not available from term positions index alone. Report the truth (-1 - not available).
|
||||
assertMatch(mi, 2, 5, -1, -1);
|
||||
}
|
||||
|
||||
public void testTermQueryIntervals() throws IOException {
|
||||
IntervalsSource source = Intervals.term("porridge");
|
||||
checkIntervals(
|
||||
|
@ -840,11 +867,11 @@ public class TestIntervals extends LuceneTestCase {
|
|||
assertEquals(5, source.minExtent());
|
||||
|
||||
MatchesIterator mi = getMatches(source, 1, "field1");
|
||||
assertMatch(mi, 3, 7, 20, 55);
|
||||
assertMatch(mi, 3, 7, -1, -1);
|
||||
MatchesIterator sub = mi.getSubMatches();
|
||||
assertNotNull(sub);
|
||||
assertMatch(sub, 3, 3, 20, 25);
|
||||
assertMatch(sub, 4, 6, 35, 39);
|
||||
assertMatch(sub, 4, 6, -1, -1);
|
||||
assertMatch(sub, 7, 7, 47, 55);
|
||||
|
||||
source = Intervals.extend(Intervals.term("w1"), 5, Integer.MAX_VALUE);
|
||||
|
|
Loading…
Reference in New Issue