mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
LUCENE-10140: Correct minimizing iterator sub-matches (#370)
Some interval iterators will attempt to minimize themselves by moving sub-iterators forward until they are no longer positioned within the current match. This causes problems when we try and pull Matches for these iterators, as their sub-iterators are now out of position. We have previously tried to deal with this by introducing caching iterators that check to see if they have been moved beyond the end of the current interval, but this fails in cases where an interval can contain multiple copies of a particular iterator. This commit adds a the ability for minimizing iterators to signal to their children when a prospective match has been found, so that they can cache their positions and offsets. Co-authored-by: Nikolay Khitrin <khitrin@gmail.com>
This commit is contained in:
parent
f67dec1739
commit
ca073c98fa
@ -349,6 +349,9 @@ Bug fixes
|
|||||||
* LUCENE-10114: Remove unused byte order mark in Lucene90PostingsWriter. This
|
* LUCENE-10114: Remove unused byte order mark in Lucene90PostingsWriter. This
|
||||||
was initially introduced by accident in Lucene 8.4. (Uwe Schindler)
|
was initially introduced by accident in Lucene 8.4. (Uwe Schindler)
|
||||||
|
|
||||||
|
* LUCENE-10140: Fix cases where minimizing interval iterators could return
|
||||||
|
incorrect matches (Nikolay Khitrin, Alan Woodward)
|
||||||
|
|
||||||
Changes in Backwards Compatibility Policy
|
Changes in Backwards Compatibility Policy
|
||||||
|
|
||||||
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top
|
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top
|
||||||
|
@ -47,7 +47,7 @@ class BlockIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private BlockIntervalsSource(List<IntervalsSource> sources) {
|
private BlockIntervalsSource(List<IntervalsSource> sources) {
|
||||||
super(flatten(sources), true);
|
super(flatten(sources));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -25,7 +25,6 @@ import org.apache.lucene.util.ArrayUtil;
|
|||||||
|
|
||||||
class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMatchesIterator {
|
class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMatchesIterator {
|
||||||
|
|
||||||
private boolean positioned = false;
|
|
||||||
private int[] posAndOffsets = new int[4 * 4];
|
private int[] posAndOffsets = new int[4 * 4];
|
||||||
private Query[] matchingQueries = new Query[4];
|
private Query[] matchingQueries = new Query[4];
|
||||||
private int count = 0;
|
private int count = 0;
|
||||||
@ -34,7 +33,7 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
|
|||||||
super(in);
|
super(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void cache() throws IOException {
|
void cache() throws IOException {
|
||||||
count = 0;
|
count = 0;
|
||||||
MatchesIterator mi = in.getSubMatches();
|
MatchesIterator mi = in.getSubMatches();
|
||||||
if (mi == null) {
|
if (mi == null) {
|
||||||
@ -62,32 +61,25 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
if (positioned == false) {
|
|
||||||
positioned = true;
|
|
||||||
} else {
|
|
||||||
cache();
|
|
||||||
}
|
|
||||||
return in.next();
|
return in.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
int startOffset(int endPos) throws IOException {
|
@Override
|
||||||
if (endPosition() <= endPos) {
|
public int startOffset() throws IOException {
|
||||||
return in.startOffset();
|
|
||||||
}
|
|
||||||
return posAndOffsets[2];
|
return posAndOffsets[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
int endOffset(int endPos) throws IOException {
|
@Override
|
||||||
if (endPosition() <= endPos) {
|
public int endOffset() throws IOException {
|
||||||
return in.endOffset();
|
return posAndOffsets[(count - 1) * 4 + 3];
|
||||||
}
|
|
||||||
return posAndOffsets[count * 4 + 3];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MatchesIterator getSubMatches(int endPos) throws IOException {
|
@Override
|
||||||
if (endPosition() <= endPos) {
|
public MatchesIterator getSubMatches() {
|
||||||
cache();
|
// We always return a submatches, even if there's only a single
|
||||||
}
|
// cached submatch, because this way we can return the correct
|
||||||
|
// positions - the positions of the top-level match may have
|
||||||
|
// moved on due to minimization
|
||||||
return new MatchesIterator() {
|
return new MatchesIterator() {
|
||||||
|
|
||||||
int upto = -1;
|
int upto = -1;
|
||||||
@ -130,6 +122,11 @@ class CachingMatchesIterator extends FilterMatchesIterator implements IntervalMa
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query getQuery() {
|
||||||
|
return matchingQueries[0];
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int gaps() {
|
public int gaps() {
|
||||||
return ((IntervalMatchesIterator) in).gaps();
|
return ((IntervalMatchesIterator) in).gaps();
|
||||||
|
@ -23,21 +23,16 @@ import java.util.List;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.FilterMatchesIterator;
|
|
||||||
import org.apache.lucene.search.MatchesIterator;
|
|
||||||
import org.apache.lucene.search.MatchesUtils;
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryVisitor;
|
import org.apache.lucene.search.QueryVisitor;
|
||||||
|
|
||||||
abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
||||||
|
|
||||||
protected final List<IntervalsSource> subSources;
|
protected final List<IntervalsSource> subSources;
|
||||||
protected final boolean isMinimizing;
|
|
||||||
|
|
||||||
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources, boolean isMinimizing) {
|
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources) {
|
||||||
assert subSources.size() > 1;
|
assert subSources.size() > 1;
|
||||||
this.subSources = subSources;
|
this.subSources = subSources;
|
||||||
this.isMinimizing = isMinimizing;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -73,9 +68,6 @@ abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
|||||||
if (mi == null) {
|
if (mi == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (isMinimizing) {
|
|
||||||
mi = new CachingMatchesIterator(mi);
|
|
||||||
}
|
|
||||||
subs.add(mi);
|
subs.add(mi);
|
||||||
}
|
}
|
||||||
IntervalIterator it =
|
IntervalIterator it =
|
||||||
@ -89,103 +81,6 @@ abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
|||||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return isMinimizing
|
return new ConjunctionMatchesIterator(it, subs);
|
||||||
? new MinimizingConjunctionMatchesIterator(it, subs)
|
|
||||||
: new ConjunctionMatchesIterator(it, subs);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
|
||||||
|
|
||||||
final IntervalIterator iterator;
|
|
||||||
final List<IntervalMatchesIterator> subs;
|
|
||||||
boolean cached = true;
|
|
||||||
|
|
||||||
private ConjunctionMatchesIterator(
|
|
||||||
IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
|
|
||||||
this.iterator = iterator;
|
|
||||||
this.subs = subs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean next() throws IOException {
|
|
||||||
if (cached) {
|
|
||||||
cached = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return iterator.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int startPosition() {
|
|
||||||
return iterator.start();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int endPosition() {
|
|
||||||
return iterator.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int startOffset() throws IOException {
|
|
||||||
int start = Integer.MAX_VALUE;
|
|
||||||
for (MatchesIterator s : subs) {
|
|
||||||
start = Math.min(start, s.startOffset());
|
|
||||||
}
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int endOffset() throws IOException {
|
|
||||||
int end = -1;
|
|
||||||
for (MatchesIterator s : subs) {
|
|
||||||
end = Math.max(end, s.endOffset());
|
|
||||||
}
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public MatchesIterator getSubMatches() throws IOException {
|
|
||||||
List<MatchesIterator> subMatches = new ArrayList<>();
|
|
||||||
for (MatchesIterator mi : subs) {
|
|
||||||
MatchesIterator sub = mi.getSubMatches();
|
|
||||||
if (sub == null) {
|
|
||||||
sub = new SingletonMatchesIterator(mi);
|
|
||||||
}
|
|
||||||
subMatches.add(sub);
|
|
||||||
}
|
|
||||||
return MatchesUtils.disjunction(subMatches);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Query getQuery() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int gaps() {
|
|
||||||
return iterator.gaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int width() {
|
|
||||||
return iterator.width();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static class SingletonMatchesIterator extends FilterMatchesIterator {
|
|
||||||
|
|
||||||
boolean exhausted = false;
|
|
||||||
|
|
||||||
SingletonMatchesIterator(MatchesIterator in) {
|
|
||||||
super(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean next() {
|
|
||||||
if (exhausted) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return exhausted = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,23 +20,21 @@ package org.apache.lucene.queries.intervals;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.lucene.search.FilterMatchesIterator;
|
||||||
import org.apache.lucene.search.MatchesIterator;
|
import org.apache.lucene.search.MatchesIterator;
|
||||||
import org.apache.lucene.search.MatchesUtils;
|
import org.apache.lucene.search.MatchesUtils;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||||
|
|
||||||
final IntervalIterator iterator;
|
final IntervalIterator iterator;
|
||||||
private final List<CachingMatchesIterator> subs = new ArrayList<>();
|
final List<? extends IntervalMatchesIterator> subs;
|
||||||
private boolean cached = true;
|
boolean cached = true;
|
||||||
|
|
||||||
MinimizingConjunctionMatchesIterator(
|
ConjunctionMatchesIterator(
|
||||||
IntervalIterator iterator, List<IntervalMatchesIterator> subs) {
|
IntervalIterator iterator, List<? extends IntervalMatchesIterator> subs) {
|
||||||
this.iterator = iterator;
|
this.iterator = iterator;
|
||||||
for (MatchesIterator mi : subs) {
|
this.subs = subs;
|
||||||
assert mi instanceof CachingMatchesIterator;
|
|
||||||
this.subs.add((CachingMatchesIterator) mi);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -61,23 +59,39 @@ class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
|||||||
@Override
|
@Override
|
||||||
public int startOffset() throws IOException {
|
public int startOffset() throws IOException {
|
||||||
int start = Integer.MAX_VALUE;
|
int start = Integer.MAX_VALUE;
|
||||||
int endPos = endPosition();
|
for (MatchesIterator s : subs) {
|
||||||
for (CachingMatchesIterator s : subs) {
|
start = Math.min(start, s.startOffset());
|
||||||
start = Math.min(start, s.startOffset(endPos));
|
|
||||||
}
|
}
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int endOffset() throws IOException {
|
public int endOffset() throws IOException {
|
||||||
int end = 0;
|
int end = -1;
|
||||||
int endPos = endPosition();
|
for (MatchesIterator s : subs) {
|
||||||
for (CachingMatchesIterator s : subs) {
|
end = Math.max(end, s.endOffset());
|
||||||
end = Math.max(end, s.endOffset(endPos));
|
|
||||||
}
|
}
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MatchesIterator getSubMatches() throws IOException {
|
||||||
|
List<MatchesIterator> subMatches = new ArrayList<>();
|
||||||
|
for (MatchesIterator mi : subs) {
|
||||||
|
MatchesIterator sub = mi.getSubMatches();
|
||||||
|
if (sub == null) {
|
||||||
|
sub = new SingletonMatchesIterator(mi);
|
||||||
|
}
|
||||||
|
subMatches.add(sub);
|
||||||
|
}
|
||||||
|
return MatchesUtils.disjunction(subMatches);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query getQuery() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int gaps() {
|
public int gaps() {
|
||||||
return iterator.gaps();
|
return iterator.gaps();
|
||||||
@ -88,18 +102,20 @@ class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
|||||||
return iterator.width();
|
return iterator.width();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
static class SingletonMatchesIterator extends FilterMatchesIterator {
|
||||||
public MatchesIterator getSubMatches() throws IOException {
|
|
||||||
List<MatchesIterator> mis = new ArrayList<>();
|
|
||||||
int endPos = endPosition();
|
|
||||||
for (CachingMatchesIterator s : subs) {
|
|
||||||
mis.add(s.getSubMatches(endPos));
|
|
||||||
}
|
|
||||||
return MatchesUtils.disjunction(mis);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
boolean exhausted = false;
|
||||||
public Query getQuery() {
|
|
||||||
return null;
|
SingletonMatchesIterator(MatchesIterator in) {
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean next() {
|
||||||
|
if (exhausted) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return exhausted = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -33,7 +33,7 @@ class ContainedByIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
private final IntervalsSource big;
|
private final IntervalsSource big;
|
||||||
|
|
||||||
private ContainedByIntervalsSource(IntervalsSource small, IntervalsSource big) {
|
private ContainedByIntervalsSource(IntervalsSource small, IntervalsSource big) {
|
||||||
super(Arrays.asList(small, big), false);
|
super(Arrays.asList(small, big));
|
||||||
this.small = small;
|
this.small = small;
|
||||||
this.big = big;
|
this.big = big;
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ class ContainingIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private ContainingIntervalsSource(IntervalsSource big, IntervalsSource small) {
|
private ContainingIntervalsSource(IntervalsSource big, IntervalsSource small) {
|
||||||
super(Arrays.asList(big, small), false);
|
super(Arrays.asList(big, small));
|
||||||
this.big = big;
|
this.big = big;
|
||||||
this.small = small;
|
this.small = small;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,105 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.queries.intervals;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.QueryVisitor;
|
||||||
|
|
||||||
|
abstract class MinimizingConjunctionIntervalsSource extends IntervalsSource {
|
||||||
|
|
||||||
|
protected final List<IntervalsSource> subSources;
|
||||||
|
|
||||||
|
protected MinimizingConjunctionIntervalsSource(List<IntervalsSource> subSources) {
|
||||||
|
assert subSources.size() > 1;
|
||||||
|
this.subSources = subSources;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract IntervalIterator combine(
|
||||||
|
List<IntervalIterator> iterators, MatchCallback onMatch);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||||
|
List<IntervalIterator> subIntervals = new ArrayList<>();
|
||||||
|
for (IntervalsSource source : subSources) {
|
||||||
|
IntervalIterator it = source.intervals(field, ctx);
|
||||||
|
if (it == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
subIntervals.add(it);
|
||||||
|
}
|
||||||
|
return combine(subIntervals, MatchCallback.NO_OP);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc)
|
||||||
|
throws IOException {
|
||||||
|
List<CachingMatchesIterator> subs = new ArrayList<>();
|
||||||
|
for (IntervalsSource source : subSources) {
|
||||||
|
IntervalMatchesIterator mi = source.matches(field, ctx, doc);
|
||||||
|
if (mi == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
subs.add(new CachingMatchesIterator(mi));
|
||||||
|
}
|
||||||
|
IntervalIterator it =
|
||||||
|
combine(
|
||||||
|
subs.stream()
|
||||||
|
.map(m -> IntervalMatches.wrapMatches(m, doc))
|
||||||
|
.collect(Collectors.toList()),
|
||||||
|
cacheIterators(subs));
|
||||||
|
if (it.advance(doc) != doc) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new ConjunctionMatchesIterator(it, subs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(String field, QueryVisitor visitor) {
|
||||||
|
Query parent = new IntervalQuery(field, this);
|
||||||
|
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, parent);
|
||||||
|
for (IntervalsSource source : subSources) {
|
||||||
|
source.visit(field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MatchCallback {
|
||||||
|
|
||||||
|
/** Called when the parent iterator has found a match */
|
||||||
|
void onMatch() throws IOException;
|
||||||
|
|
||||||
|
MatchCallback NO_OP = () -> {};
|
||||||
|
}
|
||||||
|
|
||||||
|
static MatchCallback cacheIterators(Collection<CachingMatchesIterator> its) {
|
||||||
|
return () -> {
|
||||||
|
for (CachingMatchesIterator it : its) {
|
||||||
|
it.cache();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
@ -59,7 +59,7 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
if (iterators.size() < minShouldMatch) {
|
if (iterators.size() < minShouldMatch) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return new MinimumShouldMatchIntervalIterator(iterators, minShouldMatch);
|
return new MinimumShouldMatchIntervalIterator(iterators, minShouldMatch, () -> {});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -77,7 +77,10 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
MinimumShouldMatchIntervalIterator it =
|
MinimumShouldMatchIntervalIterator it =
|
||||||
new MinimumShouldMatchIntervalIterator(lookup.keySet(), minShouldMatch);
|
new MinimumShouldMatchIntervalIterator(
|
||||||
|
lookup.keySet(),
|
||||||
|
minShouldMatch,
|
||||||
|
MinimizingConjunctionIntervalsSource.cacheIterators(lookup.values()));
|
||||||
if (it.advance(doc) != doc) {
|
if (it.advance(doc) != doc) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -159,11 +162,15 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
private final float matchCost;
|
private final float matchCost;
|
||||||
private final int minShouldMatch;
|
private final int minShouldMatch;
|
||||||
private final Collection<IntervalIterator> currentIterators = new ArrayList<>();
|
private final Collection<IntervalIterator> currentIterators = new ArrayList<>();
|
||||||
|
private final MinimizingConjunctionIntervalsSource.MatchCallback onMatch;
|
||||||
|
|
||||||
private int start, end, queueEnd, slop;
|
private int start, end, queueEnd, slop;
|
||||||
private IntervalIterator lead;
|
private IntervalIterator lead;
|
||||||
|
|
||||||
MinimumShouldMatchIntervalIterator(Collection<IntervalIterator> subs, int minShouldMatch) {
|
MinimumShouldMatchIntervalIterator(
|
||||||
|
Collection<IntervalIterator> subs,
|
||||||
|
int minShouldMatch,
|
||||||
|
MinimizingConjunctionIntervalsSource.MatchCallback onMatch) {
|
||||||
this.disiQueue = new DisiPriorityQueue(subs.size());
|
this.disiQueue = new DisiPriorityQueue(subs.size());
|
||||||
float mc = 0;
|
float mc = 0;
|
||||||
for (IntervalIterator it : subs) {
|
for (IntervalIterator it : subs) {
|
||||||
@ -173,6 +180,7 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
this.approximation = new DisjunctionDISIApproximation(disiQueue);
|
this.approximation = new DisjunctionDISIApproximation(disiQueue);
|
||||||
this.matchCost = mc;
|
this.matchCost = mc;
|
||||||
this.minShouldMatch = minShouldMatch;
|
this.minShouldMatch = minShouldMatch;
|
||||||
|
this.onMatch = onMatch;
|
||||||
|
|
||||||
this.proximityQueue =
|
this.proximityQueue =
|
||||||
new PriorityQueue<IntervalIterator>(minShouldMatch) {
|
new PriorityQueue<IntervalIterator>(minShouldMatch) {
|
||||||
@ -223,6 +231,7 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||||
// then, minimize it
|
// then, minimize it
|
||||||
do {
|
do {
|
||||||
|
onMatch.onMatch();
|
||||||
start = proximityQueue.top().start();
|
start = proximityQueue.top().start();
|
||||||
end = queueEnd;
|
end = queueEnd;
|
||||||
slop = width();
|
slop = width();
|
||||||
@ -352,10 +361,9 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
@Override
|
@Override
|
||||||
public int startOffset() throws IOException {
|
public int startOffset() throws IOException {
|
||||||
int start = Integer.MAX_VALUE;
|
int start = Integer.MAX_VALUE;
|
||||||
int endPos = endPosition();
|
|
||||||
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
||||||
CachingMatchesIterator cms = lookup.get(it);
|
CachingMatchesIterator cms = lookup.get(it);
|
||||||
start = Math.min(start, cms.startOffset(endPos));
|
start = Math.min(start, cms.startOffset());
|
||||||
}
|
}
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
@ -363,10 +371,9 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
@Override
|
@Override
|
||||||
public int endOffset() throws IOException {
|
public int endOffset() throws IOException {
|
||||||
int end = 0;
|
int end = 0;
|
||||||
int endPos = endPosition();
|
|
||||||
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
||||||
CachingMatchesIterator cms = lookup.get(it);
|
CachingMatchesIterator cms = lookup.get(it);
|
||||||
end = Math.max(end, cms.endOffset(endPos));
|
end = Math.max(end, cms.endOffset());
|
||||||
}
|
}
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
@ -384,10 +391,10 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
|||||||
@Override
|
@Override
|
||||||
public MatchesIterator getSubMatches() throws IOException {
|
public MatchesIterator getSubMatches() throws IOException {
|
||||||
List<MatchesIterator> mis = new ArrayList<>();
|
List<MatchesIterator> mis = new ArrayList<>();
|
||||||
int endPos = endPosition();
|
|
||||||
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
for (IntervalIterator it : iterator.getCurrentIterators()) {
|
||||||
CachingMatchesIterator cms = lookup.get(it);
|
CachingMatchesIterator cms = lookup.get(it);
|
||||||
mis.add(cms.getSubMatches(endPos));
|
MatchesIterator mi = cms.getSubMatches();
|
||||||
|
mis.add(mi == null ? cms : mi);
|
||||||
}
|
}
|
||||||
return MatchesUtils.disjunction(mis);
|
return MatchesUtils.disjunction(mis);
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
|
||||||
|
|
||||||
static IntervalsSource build(List<IntervalsSource> sources) {
|
static IntervalsSource build(List<IntervalsSource> sources) {
|
||||||
if (sources.size() == 1) {
|
if (sources.size() == 1) {
|
||||||
@ -69,12 +69,12 @@ class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private OrderedIntervalsSource(List<IntervalsSource> sources) {
|
private OrderedIntervalsSource(List<IntervalsSource> sources) {
|
||||||
super(sources, true);
|
super(sources);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
protected IntervalIterator combine(List<IntervalIterator> iterators, MatchCallback onMatch) {
|
||||||
return new OrderedIntervalIterator(iterators);
|
return new OrderedIntervalIterator(iterators, onMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -114,9 +114,11 @@ class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
|
|
||||||
int start = -1, end = -1, i;
|
int start = -1, end = -1, i;
|
||||||
int slop;
|
int slop;
|
||||||
|
final MatchCallback onMatch;
|
||||||
|
|
||||||
private OrderedIntervalIterator(List<IntervalIterator> subIntervals) {
|
private OrderedIntervalIterator(List<IntervalIterator> subIntervals, MatchCallback onMatch) {
|
||||||
super(subIntervals);
|
super(subIntervals);
|
||||||
|
this.onMatch = onMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -161,6 +163,7 @@ class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
for (IntervalIterator subIterator : subIterators) {
|
for (IntervalIterator subIterator : subIterators) {
|
||||||
slop -= subIterator.width();
|
slop -= subIterator.width();
|
||||||
}
|
}
|
||||||
|
onMatch.onMatch();
|
||||||
lastStart = subIterators.get(subIterators.size() - 1).start();
|
lastStart = subIterators.get(subIterators.size() - 1).start();
|
||||||
i = 1;
|
i = 1;
|
||||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||||
|
@ -29,7 +29,7 @@ class OverlappingIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
private final IntervalsSource reference;
|
private final IntervalsSource reference;
|
||||||
|
|
||||||
OverlappingIntervalsSource(IntervalsSource source, IntervalsSource reference) {
|
OverlappingIntervalsSource(IntervalsSource source, IntervalsSource reference) {
|
||||||
super(Arrays.asList(source, reference), false);
|
super(Arrays.asList(source, reference));
|
||||||
this.source = source;
|
this.source = source;
|
||||||
this.reference = reference;
|
this.reference = reference;
|
||||||
}
|
}
|
||||||
|
@ -290,7 +290,7 @@ class RepeatingIntervalsSource extends IntervalsSource {
|
|||||||
for (MatchesIterator mi : subs) {
|
for (MatchesIterator mi : subs) {
|
||||||
MatchesIterator sub = mi.getSubMatches();
|
MatchesIterator sub = mi.getSubMatches();
|
||||||
if (sub == null) {
|
if (sub == null) {
|
||||||
sub = new ConjunctionIntervalsSource.SingletonMatchesIterator(mi);
|
sub = new ConjunctionMatchesIterator.SingletonMatchesIterator(mi);
|
||||||
}
|
}
|
||||||
subMatches.add(sub);
|
subMatches.add(sub);
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ import java.util.Objects;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
class UnorderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
|
||||||
|
|
||||||
static IntervalsSource build(List<IntervalsSource> sources) {
|
static IntervalsSource build(List<IntervalsSource> sources) {
|
||||||
if (sources.size() == 1) {
|
if (sources.size() == 1) {
|
||||||
@ -68,12 +68,12 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private UnorderedIntervalsSource(List<IntervalsSource> sources) {
|
private UnorderedIntervalsSource(List<IntervalsSource> sources) {
|
||||||
super(sources, true);
|
super(sources);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
protected IntervalIterator combine(List<IntervalIterator> iterators, MatchCallback onMatch) {
|
||||||
return new UnorderedIntervalIterator(iterators);
|
return new UnorderedIntervalIterator(iterators, onMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -113,10 +113,11 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
|
|
||||||
private final PriorityQueue<IntervalIterator> queue;
|
private final PriorityQueue<IntervalIterator> queue;
|
||||||
private final IntervalIterator[] subIterators;
|
private final IntervalIterator[] subIterators;
|
||||||
|
private final MatchCallback onMatch;
|
||||||
|
|
||||||
int start = -1, end = -1, slop, queueEnd;
|
int start = -1, end = -1, slop, queueEnd;
|
||||||
|
|
||||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
|
UnorderedIntervalIterator(List<IntervalIterator> subIterators, MatchCallback onMatch) {
|
||||||
super(subIterators);
|
super(subIterators);
|
||||||
this.queue =
|
this.queue =
|
||||||
new PriorityQueue<IntervalIterator>(subIterators.size()) {
|
new PriorityQueue<IntervalIterator>(subIterators.size()) {
|
||||||
@ -126,6 +127,7 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
this.subIterators = new IntervalIterator[subIterators.size()];
|
this.subIterators = new IntervalIterator[subIterators.size()];
|
||||||
|
this.onMatch = onMatch;
|
||||||
|
|
||||||
for (int i = 0; i < subIterators.size(); i++) {
|
for (int i = 0; i < subIterators.size(); i++) {
|
||||||
this.subIterators[i] = subIterators.get(i);
|
this.subIterators[i] = subIterators.get(i);
|
||||||
@ -169,6 +171,7 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||||||
for (IntervalIterator it : subIterators) {
|
for (IntervalIterator it : subIterators) {
|
||||||
slop -= it.width();
|
slop -= it.width();
|
||||||
}
|
}
|
||||||
|
onMatch.onMatch();
|
||||||
if (queue.top().end() == end) {
|
if (queue.top().end() == end) {
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.queries.intervals;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.search.MatchesTestBase;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
public class TestComplexMatches extends MatchesTestBase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String[] getDocuments() {
|
||||||
|
return new String[] {"compare computer science", "a b a b a"};
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWildcards() throws IOException {
|
||||||
|
Query q =
|
||||||
|
new IntervalQuery(
|
||||||
|
FIELD_WITH_OFFSETS,
|
||||||
|
Intervals.ordered(
|
||||||
|
Intervals.wildcard(new BytesRef("comp*")), Intervals.term("science")));
|
||||||
|
|
||||||
|
checkMatches(q, FIELD_WITH_OFFSETS, new int[][] {{0, 1, 2, 8, 24}});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRepeatedIterators() throws IOException {
|
||||||
|
Query q =
|
||||||
|
new IntervalQuery(
|
||||||
|
FIELD_WITH_OFFSETS,
|
||||||
|
Intervals.ordered(
|
||||||
|
Intervals.term("a"),
|
||||||
|
Intervals.term("b"),
|
||||||
|
Intervals.term("a"),
|
||||||
|
Intervals.term("b"),
|
||||||
|
Intervals.term("a")));
|
||||||
|
|
||||||
|
checkTermMatches(
|
||||||
|
q,
|
||||||
|
FIELD_WITH_OFFSETS,
|
||||||
|
new TermMatch[][][] {
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
{
|
||||||
|
new TermMatch(0, 0, 1),
|
||||||
|
new TermMatch(1, 2, 3),
|
||||||
|
new TermMatch(2, 4, 5),
|
||||||
|
new TermMatch(3, 6, 7),
|
||||||
|
new TermMatch(4, 8, 9)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user