mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
LUCENE-10140: Correct minimizing iterator sub-matches (#370)
Some interval iterators will attempt to minimize themselves by moving sub-iterators forward until they are no longer positioned within the current match. This causes problems when we try and pull Matches for these iterators, as their sub-iterators are now out of position. We have previously tried to deal with this by introducing caching iterators that check to see if they have been moved beyond the end of the current interval, but this fails in cases where an interval can contain multiple copies of a particular iterator. This commit adds a the ability for minimizing iterators to signal to their children when a prospective match has been found, so that they can cache their positions and offsets. Co-authored-by: Nikolay Khitrin <khitrin@gmail.com>
This commit is contained in:
parent
f67dec1739
commit
ca073c98fa
@ -349,6 +349,9 @@ Bug fixes
|
||||
* LUCENE-10114: Remove unused byte order mark in Lucene90PostingsWriter. This
|
||||
was initially introduced by accident in Lucene 8.4. (Uwe Schindler)
|
||||
|
||||
* LUCENE-10140: Fix cases where minimizing interval iterators could return
|
||||
incorrect matches (Nikolay Khitrin, Alan Woodward)
|
||||
|
||||
Changes in Backwards Compatibility Policy
|
||||
|
||||
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top
|
||||
|
@ -47,7 +47,7 @@ class BlockIntervalsSource extends ConjunctionIntervalsSource {
|
||||
}
|
||||
|
||||
private BlockIntervalsSource(List<IntervalsSource> sources) {
|
||||
super(flatten(sources), true);
|
||||
super(flatten(sources));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -25,7 +25,6 @@ import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMatchesIterator {
|
||||
|
||||
private boolean positioned = false;
|
||||
private int[] posAndOffsets = new int[4 * 4];
|
||||
private Query[] matchingQueries = new Query[4];
|
||||
private int count = 0;
|
||||
@ -34,7 +33,7 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
|
||||
super(in);
|
||||
}
|
||||
|
||||
private void cache() throws IOException {
|
||||
void cache() throws IOException {
|
||||
count = 0;
|
||||
MatchesIterator mi = in.getSubMatches();
|
||||
if (mi == null) {
|
||||
@ -62,32 +61,25 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (positioned == false) {
|
||||
positioned = true;
|
||||
} else {
|
||||
cache();
|
||||
}
|
||||
return in.next();
|
||||
}
|
||||
|
||||
int startOffset(int endPos) throws IOException {
|
||||
if (endPosition() <= endPos) {
|
||||
return in.startOffset();
|
||||
}
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return posAndOffsets[2];
|
||||
}
|
||||
|
||||
int endOffset(int endPos) throws IOException {
|
||||
if (endPosition() <= endPos) {
|
||||
return in.endOffset();
|
||||
}
|
||||
return posAndOffsets[count * 4 + 3];
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return posAndOffsets[(count - 1) * 4 + 3];
|
||||
}
|
||||
|
||||
MatchesIterator getSubMatches(int endPos) throws IOException {
|
||||
if (endPosition() <= endPos) {
|
||||
cache();
|
||||
}
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() {
|
||||
// We always return a submatches, even if there's only a single
|
||||
// cached submatch, because this way we can return the correct
|
||||
// positions - the positions of the top-level match may have
|
||||
// moved on due to minimization
|
||||
return new MatchesIterator() {
|
||||
|
||||
int upto = -1;
|
||||
@ -130,6 +122,11 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return matchingQueries[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return ((IntervalMatchesIterator) in).gaps();
|
||||
|
@ -23,21 +23,16 @@ import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.FilterMatchesIterator;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
|
||||
abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
|
||||
protected final List<IntervalsSource> subSources;
|
||||
protected final boolean isMinimizing;
|
||||
|
||||
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources, boolean isMinimizing) {
|
||||
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources) {
|
||||
assert subSources.size() > 1;
|
||||
this.subSources = subSources;
|
||||
this.isMinimizing = isMinimizing;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -73,9 +68,6 @@ abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
if (mi == null) {
|
||||
return null;
|
||||
}
|
||||
if (isMinimizing) {
|
||||
mi = new CachingMatchesIterator(mi);
|
||||
}
|
||||
subs.add(mi);
|
||||
}
|
||||
IntervalIterator it =
|
||||
@ -89,103 +81,6 @@ abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return null;
|
||||
}
|
||||
return isMinimizing
|
||||
? new MinimizingConjunctionMatchesIterator(it, subs)
|
||||
: new ConjunctionMatchesIterator(it, subs);
|
||||
}
|
||||
|
||||
private static class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
|
||||
final IntervalIterator iterator;
|
||||
final List<IntervalMatchesIterator> subs;
|
||||
boolean cached = true;
|
||||
|
||||
private ConjunctionMatchesIterator(
|
||||
IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
|
||||
this.iterator = iterator;
|
||||
this.subs = subs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (cached) {
|
||||
cached = false;
|
||||
return true;
|
||||
}
|
||||
return iterator.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return iterator.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return iterator.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
int start = Integer.MAX_VALUE;
|
||||
for (MatchesIterator s : subs) {
|
||||
start = Math.min(start, s.startOffset());
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
int end = -1;
|
||||
for (MatchesIterator s : subs) {
|
||||
end = Math.max(end, s.endOffset());
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
List<MatchesIterator> subMatches = new ArrayList<>();
|
||||
for (MatchesIterator mi : subs) {
|
||||
MatchesIterator sub = mi.getSubMatches();
|
||||
if (sub == null) {
|
||||
sub = new SingletonMatchesIterator(mi);
|
||||
}
|
||||
subMatches.add(sub);
|
||||
}
|
||||
return MatchesUtils.disjunction(subMatches);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return iterator.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return iterator.width();
|
||||
}
|
||||
}
|
||||
|
||||
static class SingletonMatchesIterator extends FilterMatchesIterator {
|
||||
|
||||
boolean exhausted = false;
|
||||
|
||||
SingletonMatchesIterator(MatchesIterator in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
if (exhausted) {
|
||||
return false;
|
||||
}
|
||||
return exhausted = true;
|
||||
}
|
||||
return new ConjunctionMatchesIterator(it, subs);
|
||||
}
|
||||
}
|
||||
|
@ -20,23 +20,21 @@ package org.apache.lucene.queries.intervals;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.search.FilterMatchesIterator;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
|
||||
final IntervalIterator iterator;
|
||||
private final List<CachingMatchesIterator> subs = new ArrayList<>();
|
||||
private boolean cached = true;
|
||||
final List<? extends IntervalMatchesIterator> subs;
|
||||
boolean cached = true;
|
||||
|
||||
MinimizingConjunctionMatchesIterator(
|
||||
IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
|
||||
ConjunctionMatchesIterator(
|
||||
IntervalIterator iterator, List<? extends IntervalMatchesIterator> subs) {
|
||||
this.iterator = iterator;
|
||||
for (MatchesIterator mi : subs) {
|
||||
assert mi instanceof CachingMatchesIterator;
|
||||
this.subs.add((CachingMatchesIterator) mi);
|
||||
}
|
||||
this.subs = subs;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -61,23 +59,39 @@ class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
int start = Integer.MAX_VALUE;
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
start = Math.min(start, s.startOffset(endPos));
|
||||
for (MatchesIterator s : subs) {
|
||||
start = Math.min(start, s.startOffset());
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
int end = 0;
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
end = Math.max(end, s.endOffset(endPos));
|
||||
int end = -1;
|
||||
for (MatchesIterator s : subs) {
|
||||
end = Math.max(end, s.endOffset());
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
List<MatchesIterator> subMatches = new ArrayList<>();
|
||||
for (MatchesIterator mi : subs) {
|
||||
MatchesIterator sub = mi.getSubMatches();
|
||||
if (sub == null) {
|
||||
sub = new SingletonMatchesIterator(mi);
|
||||
}
|
||||
subMatches.add(sub);
|
||||
}
|
||||
return MatchesUtils.disjunction(subMatches);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return iterator.gaps();
|
||||
@ -88,18 +102,20 @@ class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
return iterator.width();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
List<MatchesIterator> mis = new ArrayList<>();
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
mis.add(s.getSubMatches(endPos));
|
||||
}
|
||||
return MatchesUtils.disjunction(mis);
|
||||
}
|
||||
static class SingletonMatchesIterator extends FilterMatchesIterator {
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return null;
|
||||
boolean exhausted = false;
|
||||
|
||||
SingletonMatchesIterator(MatchesIterator in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
if (exhausted) {
|
||||
return false;
|
||||
}
|
||||
return exhausted = true;
|
||||
}
|
||||
}
|
||||
}
|
@ -33,7 +33,7 @@ class ContainedByIntervalsSource extends ConjunctionIntervalsSource {
|
||||
private final IntervalsSource big;
|
||||
|
||||
private ContainedByIntervalsSource(IntervalsSource small, IntervalsSource big) {
|
||||
super(Arrays.asList(small, big), false);
|
||||
super(Arrays.asList(small, big));
|
||||
this.small = small;
|
||||
this.big = big;
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ class ContainingIntervalsSource extends ConjunctionIntervalsSource {
|
||||
}
|
||||
|
||||
private ContainingIntervalsSource(IntervalsSource big, IntervalsSource small) {
|
||||
super(Arrays.asList(big, small), false);
|
||||
super(Arrays.asList(big, small));
|
||||
this.big = big;
|
||||
this.small = small;
|
||||
}
|
||||
|
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.queries.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
|
||||
abstract class MinimizingConjunctionIntervalsSource extends IntervalsSource {
|
||||
|
||||
protected final List<IntervalsSource> subSources;
|
||||
|
||||
protected MinimizingConjunctionIntervalsSource(List<IntervalsSource> subSources) {
|
||||
assert subSources.size() > 1;
|
||||
this.subSources = subSources;
|
||||
}
|
||||
|
||||
protected abstract IntervalIterator combine(
|
||||
List<IntervalIterator> iterators, MatchCallback onMatch);
|
||||
|
||||
@Override
|
||||
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
List<IntervalIterator> subIntervals = new ArrayList<>();
|
||||
for (IntervalsSource source : subSources) {
|
||||
IntervalIterator it = source.intervals(field, ctx);
|
||||
if (it == null) {
|
||||
return null;
|
||||
}
|
||||
subIntervals.add(it);
|
||||
}
|
||||
return combine(subIntervals, MatchCallback.NO_OP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc)
|
||||
throws IOException {
|
||||
List<CachingMatchesIterator> subs = new ArrayList<>();
|
||||
for (IntervalsSource source : subSources) {
|
||||
IntervalMatchesIterator mi = source.matches(field, ctx, doc);
|
||||
if (mi == null) {
|
||||
return null;
|
||||
}
|
||||
subs.add(new CachingMatchesIterator(mi));
|
||||
}
|
||||
IntervalIterator it =
|
||||
combine(
|
||||
subs.stream()
|
||||
.map(m -> IntervalMatches.wrapMatches(m, doc))
|
||||
.collect(Collectors.toList()),
|
||||
cacheIterators(subs));
|
||||
if (it.advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return null;
|
||||
}
|
||||
return new ConjunctionMatchesIterator(it, subs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(String field, QueryVisitor visitor) {
|
||||
Query parent = new IntervalQuery(field, this);
|
||||
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, parent);
|
||||
for (IntervalsSource source : subSources) {
|
||||
source.visit(field, v);
|
||||
}
|
||||
}
|
||||
|
||||
interface MatchCallback {
|
||||
|
||||
/** Called when the parent iterator has found a match */
|
||||
void onMatch() throws IOException;
|
||||
|
||||
MatchCallback NO_OP = () -> {};
|
||||
}
|
||||
|
||||
static MatchCallback cacheIterators(Collection<CachingMatchesIterator> its) {
|
||||
return () -> {
|
||||
for (CachingMatchesIterator it : its) {
|
||||
it.cache();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -59,7 +59,7 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
if (iterators.size() < minShouldMatch) {
|
||||
return null;
|
||||
}
|
||||
return new MinimumShouldMatchIntervalIterator(iterators, minShouldMatch);
|
||||
return new MinimumShouldMatchIntervalIterator(iterators, minShouldMatch, () -> {});
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -77,7 +77,10 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
return null;
|
||||
}
|
||||
MinimumShouldMatchIntervalIterator it =
|
||||
new MinimumShouldMatchIntervalIterator(lookup.keySet(), minShouldMatch);
|
||||
new MinimumShouldMatchIntervalIterator(
|
||||
lookup.keySet(),
|
||||
minShouldMatch,
|
||||
MinimizingConjunctionIntervalsSource.cacheIterators(lookup.values()));
|
||||
if (it.advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
@ -159,11 +162,15 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
private final float matchCost;
|
||||
private final int minShouldMatch;
|
||||
private final Collection<IntervalIterator> currentIterators = new ArrayList<>();
|
||||
private final MinimizingConjunctionIntervalsSource.MatchCallback onMatch;
|
||||
|
||||
private int start, end, queueEnd, slop;
|
||||
private IntervalIterator lead;
|
||||
|
||||
MinimumShouldMatchIntervalIterator(Collection<IntervalIterator> subs, int minShouldMatch) {
|
||||
MinimumShouldMatchIntervalIterator(
|
||||
Collection<IntervalIterator> subs,
|
||||
int minShouldMatch,
|
||||
MinimizingConjunctionIntervalsSource.MatchCallback onMatch) {
|
||||
this.disiQueue = new DisiPriorityQueue(subs.size());
|
||||
float mc = 0;
|
||||
for (IntervalIterator it : subs) {
|
||||
@ -173,6 +180,7 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
this.approximation = new DisjunctionDISIApproximation(disiQueue);
|
||||
this.matchCost = mc;
|
||||
this.minShouldMatch = minShouldMatch;
|
||||
this.onMatch = onMatch;
|
||||
|
||||
this.proximityQueue =
|
||||
new PriorityQueue<IntervalIterator>(minShouldMatch) {
|
||||
@ -223,6 +231,7 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
// then, minimize it
|
||||
do {
|
||||
onMatch.onMatch();
|
||||
start = proximityQueue.top().start();
|
||||
end = queueEnd;
|
||||
slop = width();
|
||||
@ -352,10 +361,9 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
int start = Integer.MAX_VALUE;
|
||||
int endPos = endPosition();
|
||||
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
||||
CachingMatchesIterator cms = lookup.get(it);
|
||||
start = Math.min(start, cms.startOffset(endPos));
|
||||
start = Math.min(start, cms.startOffset());
|
||||
}
|
||||
return start;
|
||||
}
|
||||
@ -363,10 +371,9 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
int end = 0;
|
||||
int endPos = endPosition();
|
||||
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
||||
CachingMatchesIterator cms = lookup.get(it);
|
||||
end = Math.max(end, cms.endOffset(endPos));
|
||||
end = Math.max(end, cms.endOffset());
|
||||
}
|
||||
return end;
|
||||
}
|
||||
@ -384,10 +391,10 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
List<MatchesIterator> mis = new ArrayList<>();
|
||||
int endPos = endPosition();
|
||||
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
||||
CachingMatchesIterator cms = lookup.get(it);
|
||||
mis.add(cms.getSubMatches(endPos));
|
||||
MatchesIterator mi = cms.getSubMatches();
|
||||
mis.add(mi == null ? cms : mi);
|
||||
}
|
||||
return MatchesUtils.disjunction(mis);
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(List<IntervalsSource> sources) {
|
||||
if (sources.size() == 1) {
|
||||
@ -69,12 +69,12 @@ class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
}
|
||||
|
||||
private OrderedIntervalsSource(List<IntervalsSource> sources) {
|
||||
super(sources, true);
|
||||
super(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
return new OrderedIntervalIterator(iterators);
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators, MatchCallback onMatch) {
|
||||
return new OrderedIntervalIterator(iterators, onMatch);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -114,9 +114,11 @@ class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
int start = -1, end = -1, i;
|
||||
int slop;
|
||||
final MatchCallback onMatch;
|
||||
|
||||
private OrderedIntervalIterator(List<IntervalIterator> subIntervals) {
|
||||
private OrderedIntervalIterator(List<IntervalIterator> subIntervals, MatchCallback onMatch) {
|
||||
super(subIntervals);
|
||||
this.onMatch = onMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -161,6 +163,7 @@ class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
for (IntervalIterator subIterator : subIterators) {
|
||||
slop -= subIterator.width();
|
||||
}
|
||||
onMatch.onMatch();
|
||||
lastStart = subIterators.get(subIterators.size() - 1).start();
|
||||
i = 1;
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
|
@ -29,7 +29,7 @@ class OverlappingIntervalsSource extends ConjunctionIntervalsSource {
|
||||
private final IntervalsSource reference;
|
||||
|
||||
OverlappingIntervalsSource(IntervalsSource source, IntervalsSource reference) {
|
||||
super(Arrays.asList(source, reference), false);
|
||||
super(Arrays.asList(source, reference));
|
||||
this.source = source;
|
||||
this.reference = reference;
|
||||
}
|
||||
|
@ -290,7 +290,7 @@ class RepeatingIntervalsSource extends IntervalsSource {
|
||||
for (MatchesIterator mi : subs) {
|
||||
MatchesIterator sub = mi.getSubMatches();
|
||||
if (sub == null) {
|
||||
sub = new ConjunctionIntervalsSource.SingletonMatchesIterator(mi);
|
||||
sub = new ConjunctionMatchesIterator.SingletonMatchesIterator(mi);
|
||||
}
|
||||
subMatches.add(sub);
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
class UnorderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(List<IntervalsSource> sources) {
|
||||
if (sources.size() == 1) {
|
||||
@ -68,12 +68,12 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
}
|
||||
|
||||
private UnorderedIntervalsSource(List<IntervalsSource> sources) {
|
||||
super(sources, true);
|
||||
super(sources);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
return new UnorderedIntervalIterator(iterators);
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators, MatchCallback onMatch) {
|
||||
return new UnorderedIntervalIterator(iterators, onMatch);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -113,10 +113,11 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
private final PriorityQueue<IntervalIterator> queue;
|
||||
private final IntervalIterator[] subIterators;
|
||||
private final MatchCallback onMatch;
|
||||
|
||||
int start = -1, end = -1, slop, queueEnd;
|
||||
|
||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
|
||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators, MatchCallback onMatch) {
|
||||
super(subIterators);
|
||||
this.queue =
|
||||
new PriorityQueue<IntervalIterator>(subIterators.size()) {
|
||||
@ -126,6 +127,7 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
}
|
||||
};
|
||||
this.subIterators = new IntervalIterator[subIterators.size()];
|
||||
this.onMatch = onMatch;
|
||||
|
||||
for (int i = 0; i < subIterators.size(); i++) {
|
||||
this.subIterators[i] = subIterators.get(i);
|
||||
@ -169,6 +171,7 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
for (IntervalIterator it : subIterators) {
|
||||
slop -= it.width();
|
||||
}
|
||||
onMatch.onMatch();
|
||||
if (queue.top().end() == end) {
|
||||
return start;
|
||||
}
|
||||
|
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.queries.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.MatchesTestBase;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class TestComplexMatches extends MatchesTestBase {
|
||||
|
||||
@Override
|
||||
protected String[] getDocuments() {
|
||||
return new String[] {"compare computer science", "a b a b a"};
|
||||
}
|
||||
|
||||
public void testWildcards() throws IOException {
|
||||
Query q =
|
||||
new IntervalQuery(
|
||||
FIELD_WITH_OFFSETS,
|
||||
Intervals.ordered(
|
||||
Intervals.wildcard(new BytesRef("comp*")), Intervals.term("science")));
|
||||
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][] {{0, 1, 2, 8, 24}});
|
||||
}
|
||||
|
||||
public void testRepeatedIterators() throws IOException {
|
||||
Query q =
|
||||
new IntervalQuery(
|
||||
FIELD_WITH_OFFSETS,
|
||||
Intervals.ordered(
|
||||
Intervals.term("a"),
|
||||
Intervals.term("b"),
|
||||
Intervals.term("a"),
|
||||
Intervals.term("b"),
|
||||
Intervals.term("a")));
|
||||
|
||||
checkTermMatches(
|
||||
q,
|
||||
FIELD_WITH_OFFSETS,
|
||||
new TermMatch[][][] {
|
||||
{},
|
||||
{
|
||||
{
|
||||
new TermMatch(0, 0, 1),
|
||||
new TermMatch(1, 2, 3),
|
||||
new TermMatch(2, 4, 5),
|
||||
new TermMatch(3, 6, 7),
|
||||
new TermMatch(4, 8, 9)
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user