mirror of https://github.com/apache/lucene.git
LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource
This commit is contained in:
parent
85abdbebf5
commit
67677d995e
|
@ -63,6 +63,9 @@ Bug Fixes
|
|||
* LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances.
|
||||
(Vamshi Vijay Nakkirtha via Adrien Grand)
|
||||
|
||||
* LUCENE-8828: Removes the buggy 'disallow overlaps' boolean from Intervals.unordered(),
|
||||
and replaces it with a new Intervals.unorderedNoOverlaps() method (Alan Woodward)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.intervals;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
@ -38,9 +39,19 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
class DisjunctionIntervalsSource extends IntervalsSource {
|
||||
|
||||
final Collection<IntervalsSource> subSources;
|
||||
final boolean pullUpDisjunctions;
|
||||
|
||||
public DisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
|
||||
static IntervalsSource create(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
|
||||
subSources = simplify(subSources);
|
||||
if (subSources.size() == 1) {
|
||||
return subSources.iterator().next();
|
||||
}
|
||||
return new DisjunctionIntervalsSource(subSources, pullUpDisjunctions);
|
||||
}
|
||||
|
||||
private DisjunctionIntervalsSource(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
|
||||
this.subSources = simplify(subSources);
|
||||
this.pullUpDisjunctions = pullUpDisjunctions;
|
||||
}
|
||||
|
||||
private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) {
|
||||
|
@ -120,7 +131,10 @@ class DisjunctionIntervalsSource extends IntervalsSource {
|
|||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return subSources;
|
||||
if (pullUpDisjunctions) {
|
||||
return subSources;
|
||||
}
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
|
||||
static class DisjunctionIntervalIterator extends IntervalIterator {
|
||||
|
|
|
@ -138,13 +138,7 @@ public final class Intervals {
|
|||
* @param subSources the sources to combine
|
||||
*/
|
||||
public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) {
|
||||
if (subSources.size() == 1) {
|
||||
return subSources.get(0);
|
||||
}
|
||||
if (rewrite) {
|
||||
return new DisjunctionIntervalsSource(subSources);
|
||||
}
|
||||
return new NoRewriteDisjunctionIntervalsSource(subSources);
|
||||
return DisjunctionIntervalsSource.create(subSources, rewrite);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -227,19 +221,16 @@ public final class Intervals {
|
|||
* @param subSources an unordered set of {@link IntervalsSource}s
|
||||
*/
|
||||
public static IntervalsSource unordered(IntervalsSource... subSources) {
|
||||
return unordered(true, subSources);
|
||||
return UnorderedIntervalsSource.build(Arrays.asList(subSources));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an unordered {@link IntervalsSource}
|
||||
* Create an unordered {@link IntervalsSource} allowing no overlaps between subsources
|
||||
*
|
||||
* Returns intervals in which all the subsources appear.
|
||||
*
|
||||
* @param subSources an unordered set of {@link IntervalsSource}s
|
||||
* @param allowOverlaps whether or not the sources should be allowed to overlap in a hit
|
||||
* Returns intervals in which both the subsources appear and do not overlap.
|
||||
*/
|
||||
public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) {
|
||||
return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps);
|
||||
public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) {
|
||||
return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource {
|
||||
|
||||
public NoRewriteDisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
|
||||
super(subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
}
|
|
@ -29,17 +29,17 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
|
||||
class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(List<IntervalsSource> sources, boolean allowOverlaps) {
|
||||
static IntervalsSource build(List<IntervalsSource> sources) {
|
||||
if (sources.size() == 1) {
|
||||
return sources.get(0);
|
||||
}
|
||||
return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps);
|
||||
return new UnorderedIntervalsSource(flatten(sources));
|
||||
}
|
||||
|
||||
private static List<IntervalsSource> flatten(List<IntervalsSource> sources, boolean allowOverlaps) {
|
||||
private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
|
||||
List<IntervalsSource> flattened = new ArrayList<>();
|
||||
for (IntervalsSource s : sources) {
|
||||
if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) {
|
||||
if (s instanceof UnorderedIntervalsSource) {
|
||||
flattened.addAll(((UnorderedIntervalsSource)s).subSources);
|
||||
}
|
||||
else {
|
||||
|
@ -49,16 +49,13 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
return flattened;
|
||||
}
|
||||
|
||||
private final boolean allowOverlaps;
|
||||
|
||||
private UnorderedIntervalsSource(List<IntervalsSource> sources, boolean allowOverlaps) {
|
||||
private UnorderedIntervalsSource(List<IntervalsSource> sources) {
|
||||
super(sources, true);
|
||||
this.allowOverlaps = allowOverlaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
return new UnorderedIntervalIterator(iterators, allowOverlaps);
|
||||
return new UnorderedIntervalIterator(iterators);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -72,25 +69,24 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps));
|
||||
return Disjunctions.pullUp(subSources, UnorderedIntervalsSource::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(this.subSources, this.allowOverlaps);
|
||||
return Objects.hash(this.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof UnorderedIntervalsSource == false) return false;
|
||||
UnorderedIntervalsSource o = (UnorderedIntervalsSource) other;
|
||||
return Objects.equals(this.subSources, o.subSources) &&
|
||||
Objects.equals(this.allowOverlaps, o.allowOverlaps);
|
||||
return Objects.equals(this.subSources, o.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") +
|
||||
return "UNORDERED(" +
|
||||
subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
|
||||
}
|
||||
|
||||
|
@ -99,11 +95,10 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
private final PriorityQueue<IntervalIterator> queue;
|
||||
private final IntervalIterator[] subIterators;
|
||||
private final int[] innerPositions;
|
||||
private final boolean allowOverlaps;
|
||||
|
||||
int start = -1, end = -1, firstEnd, queueEnd;
|
||||
|
||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) {
|
||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
|
||||
super(subIterators);
|
||||
this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
|
||||
@Override
|
||||
|
@ -113,7 +108,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
};
|
||||
this.subIterators = new IntervalIterator[subIterators.size()];
|
||||
this.innerPositions = new int[subIterators.size() * 2];
|
||||
this.allowOverlaps = allowOverlaps;
|
||||
|
||||
for (int i = 0; i < subIterators.size(); i++) {
|
||||
this.subIterators[i] = subIterators.get(i);
|
||||
|
@ -143,12 +137,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
while (this.queue.size() == subIterators.length && queue.top().start() == start) {
|
||||
IntervalIterator it = queue.pop();
|
||||
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
|
@ -164,13 +152,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
return start;
|
||||
IntervalIterator it = queue.pop();
|
||||
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
|
@ -202,39 +183,14 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
|||
protected void reset() throws IOException {
|
||||
queueEnd = start = end = -1;
|
||||
this.queue.clear();
|
||||
loop: for (IntervalIterator it : subIterators) {
|
||||
for (IntervalIterator it : subIterators) {
|
||||
if (it.nextInterval() == NO_MORE_INTERVALS) {
|
||||
break;
|
||||
}
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == NO_MORE_INTERVALS) {
|
||||
break loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasOverlaps(IntervalIterator candidate) {
|
||||
for (IntervalIterator it : queue) {
|
||||
if (it.start() < candidate.start()) {
|
||||
if (it.end() >= candidate.start()) {
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (it.start() == candidate.start()) {
|
||||
return true;
|
||||
}
|
||||
if (it.start() <= candidate.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,7 +71,8 @@ public class TestIntervalQuery extends LuceneTestCase {
|
|||
"w2 w1 w3 w2 w4",
|
||||
"coordinate genome mapping research",
|
||||
"coordinate genome research",
|
||||
"greater new york"
|
||||
"greater new york",
|
||||
"x x x x x intend x x x message x x x message x x x addressed x x"
|
||||
};
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
|
@ -220,6 +221,19 @@ public class TestIntervalQuery extends LuceneTestCase {
|
|||
checkHits(q, new int[]{3});
|
||||
}
|
||||
|
||||
public void testUnorderedNoOverlaps() throws IOException {
|
||||
Query q = new IntervalQuery(field,
|
||||
Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("addressed"),
|
||||
Intervals.maxgaps(5, Intervals.unorderedNoOverlaps(Intervals.term("message"),
|
||||
Intervals.maxgaps(3, Intervals.unordered(Intervals.term("intend"), Intervals.term("message"))))))));
|
||||
checkHits(q, new int[]{ 9 });
|
||||
|
||||
q = new IntervalQuery(field, Intervals.unorderedNoOverlaps(
|
||||
Intervals.term("w2"),
|
||||
Intervals.or(Intervals.term("w2"), Intervals.term("w3"))));
|
||||
checkHits(q, new int[]{ 0, 1, 2, 3, 5 });
|
||||
}
|
||||
|
||||
public void testNestedOrInUnorderedMaxGaps() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered(
|
||||
Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")),
|
||||
|
|
|
@ -401,7 +401,7 @@ public class TestIntervals extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testUnorderedDistinct() throws IOException {
|
||||
checkIntervals(Intervals.unordered(false, Intervals.term("pease"), Intervals.term("pease")),
|
||||
checkIntervals(Intervals.unorderedNoOverlaps(Intervals.term("pease"), Intervals.term("pease")),
|
||||
"field1", 3, new int[][]{
|
||||
{},
|
||||
{ 0, 3, 3, 6 },
|
||||
|
@ -410,18 +410,18 @@ public class TestIntervals extends LuceneTestCase {
|
|||
{ 0, 3, 3, 6 },
|
||||
{}
|
||||
});
|
||||
checkIntervals(Intervals.unordered(false,
|
||||
checkIntervals(Intervals.unorderedNoOverlaps(
|
||||
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
|
||||
Intervals.term("porridge")),
|
||||
"field1", 3, new int[][]{
|
||||
{},
|
||||
{ 1, 4, 4, 17 },
|
||||
{ 1, 4, 2, 7, 4, 17 },
|
||||
{ 1, 5, 4, 7 },
|
||||
{},
|
||||
{ 1, 4, 4, 17 },
|
||||
{ 1, 4, 2, 7, 4, 17 },
|
||||
{}
|
||||
});
|
||||
checkIntervals(Intervals.unordered(false,
|
||||
checkIntervals(Intervals.unorderedNoOverlaps(
|
||||
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
|
||||
Intervals.term("porridge")),
|
||||
"field2", 1, new int[][]{
|
||||
|
@ -432,6 +432,16 @@ public class TestIntervals extends LuceneTestCase {
|
|||
{ 0, 3 },
|
||||
{}
|
||||
});
|
||||
checkIntervals(Intervals.unorderedNoOverlaps(
|
||||
Intervals.term("porridge"),
|
||||
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"))), "field1", 3, new int[][]{
|
||||
{},
|
||||
{ 1, 4, 4, 7 },
|
||||
{ 1, 4, 4, 7 },
|
||||
{},
|
||||
{ 1, 4, 4, 7 },
|
||||
{}
|
||||
});
|
||||
}
|
||||
|
||||
public void testContainedBy() throws IOException {
|
||||
|
|
|
@ -45,9 +45,14 @@ public class TestSimplifications extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testUnorderedOverlaps() {
|
||||
// UNORDERED_NO_OVERLAPS(term) => term
|
||||
IntervalsSource actual = Intervals.unordered(false, Intervals.term("term"));
|
||||
assertEquals(Intervals.term("term"), actual);
|
||||
// UNORDERED_NO_OVERLAPS(term, term) => ORDERED(term, term)
|
||||
IntervalsSource actual = Intervals.unorderedNoOverlaps(Intervals.term("term"), Intervals.term("term"));
|
||||
assertEquals(Intervals.ordered(Intervals.term("term"), Intervals.term("term")), actual);
|
||||
}
|
||||
|
||||
public void testDisjunctionSingleton() {
|
||||
IntervalsSource actual = Intervals.or(Intervals.term("a"));
|
||||
assertEquals(Intervals.term("a"), actual);
|
||||
}
|
||||
|
||||
public void testDisjunctionRemovesDuplicates() {
|
||||
|
|
Loading…
Reference in New Issue