LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource

This commit is contained in:
Alan Woodward 2019-06-07 14:45:56 +01:00
parent 85abdbebf5
commit 67677d995e
8 changed files with 75 additions and 115 deletions

View File

@ -63,6 +63,9 @@ Bug Fixes
* LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances.
(Vamshi Vijay Nakkirtha via Adrien Grand)
* LUCENE-8828: Removes the buggy 'disallow overlaps' boolean from Intervals.unordered(),
and replaces it with a new Intervals.unorderedNoOverlaps() method (Alan Woodward)
Improvements
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.intervals;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
@ -38,9 +39,19 @@ import org.apache.lucene.util.PriorityQueue;
class DisjunctionIntervalsSource extends IntervalsSource {
final Collection<IntervalsSource> subSources;
final boolean pullUpDisjunctions;
public DisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
static IntervalsSource create(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
subSources = simplify(subSources);
if (subSources.size() == 1) {
return subSources.iterator().next();
}
return new DisjunctionIntervalsSource(subSources, pullUpDisjunctions);
}
private DisjunctionIntervalsSource(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
this.subSources = simplify(subSources);
this.pullUpDisjunctions = pullUpDisjunctions;
}
private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) {
@ -120,7 +131,10 @@ class DisjunctionIntervalsSource extends IntervalsSource {
@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return subSources;
if (pullUpDisjunctions) {
return subSources;
}
return Collections.singletonList(this);
}
static class DisjunctionIntervalIterator extends IntervalIterator {

View File

@ -138,13 +138,7 @@ public final class Intervals {
* @param subSources the sources to combine
*/
public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) {
if (subSources.size() == 1) {
return subSources.get(0);
}
if (rewrite) {
return new DisjunctionIntervalsSource(subSources);
}
return new NoRewriteDisjunctionIntervalsSource(subSources);
return DisjunctionIntervalsSource.create(subSources, rewrite);
}
/**
@ -227,19 +221,16 @@ public final class Intervals {
* @param subSources an unordered set of {@link IntervalsSource}s
*/
public static IntervalsSource unordered(IntervalsSource... subSources) {
return unordered(true, subSources);
return UnorderedIntervalsSource.build(Arrays.asList(subSources));
}
/**
* Create an unordered {@link IntervalsSource}
* Create an unordered {@link IntervalsSource} allowing no overlaps between subsources
*
* Returns intervals in which all the subsources appear.
*
* @param subSources an unordered set of {@link IntervalsSource}s
* @param allowOverlaps whether or not the sources should be allowed to overlap in a hit
* Returns intervals in which both the subsources appear and do not overlap.
*/
public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) {
return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps);
public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) {
return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a));
}
/**

View File

@ -1,33 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.intervals;
import java.util.Collection;
import java.util.Collections;
class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource {
public NoRewriteDisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
super(subSources);
}
@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return Collections.singletonList(this);
}
}

View File

@ -29,17 +29,17 @@ import org.apache.lucene.util.PriorityQueue;
class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
static IntervalsSource build(List<IntervalsSource> sources, boolean allowOverlaps) {
static IntervalsSource build(List<IntervalsSource> sources) {
if (sources.size() == 1) {
return sources.get(0);
}
return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps);
return new UnorderedIntervalsSource(flatten(sources));
}
private static List<IntervalsSource> flatten(List<IntervalsSource> sources, boolean allowOverlaps) {
private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) {
if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) {
if (s instanceof UnorderedIntervalsSource) {
flattened.addAll(((UnorderedIntervalsSource)s).subSources);
}
else {
@ -49,16 +49,13 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
return flattened;
}
private final boolean allowOverlaps;
private UnorderedIntervalsSource(List<IntervalsSource> sources, boolean allowOverlaps) {
private UnorderedIntervalsSource(List<IntervalsSource> sources) {
super(sources, true);
this.allowOverlaps = allowOverlaps;
}
@Override
protected IntervalIterator combine(List<IntervalIterator> iterators) {
return new UnorderedIntervalIterator(iterators, allowOverlaps);
return new UnorderedIntervalIterator(iterators);
}
@Override
@ -72,25 +69,24 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps));
return Disjunctions.pullUp(subSources, UnorderedIntervalsSource::new);
}
@Override
public int hashCode() {
return Objects.hash(this.subSources, this.allowOverlaps);
return Objects.hash(this.subSources);
}
@Override
public boolean equals(Object other) {
if (other instanceof UnorderedIntervalsSource == false) return false;
UnorderedIntervalsSource o = (UnorderedIntervalsSource) other;
return Objects.equals(this.subSources, o.subSources) &&
Objects.equals(this.allowOverlaps, o.allowOverlaps);
return Objects.equals(this.subSources, o.subSources);
}
@Override
public String toString() {
return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") +
return "UNORDERED(" +
subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
}
@ -99,11 +95,10 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
private final PriorityQueue<IntervalIterator> queue;
private final IntervalIterator[] subIterators;
private final int[] innerPositions;
private final boolean allowOverlaps;
int start = -1, end = -1, firstEnd, queueEnd;
UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) {
UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
super(subIterators);
this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
@Override
@ -113,7 +108,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
};
this.subIterators = new IntervalIterator[subIterators.size()];
this.innerPositions = new int[subIterators.size() * 2];
this.allowOverlaps = allowOverlaps;
for (int i = 0; i < subIterators.size(); i++) {
this.subIterators[i] = subIterators.get(i);
@ -143,12 +137,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
while (this.queue.size() == subIterators.length && queue.top().start() == start) {
IntervalIterator it = queue.pop();
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
if (allowOverlaps == false) {
while (hasOverlaps(it)) {
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
return start = end = IntervalIterator.NO_MORE_INTERVALS;
}
}
queue.add(it);
updateRightExtreme(it);
}
@ -164,13 +152,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
return start;
IntervalIterator it = queue.pop();
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
if (allowOverlaps == false) {
while (hasOverlaps(it)) {
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
return start;
}
}
}
queue.add(it);
updateRightExtreme(it);
}
@ -202,39 +183,14 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
protected void reset() throws IOException {
queueEnd = start = end = -1;
this.queue.clear();
loop: for (IntervalIterator it : subIterators) {
for (IntervalIterator it : subIterators) {
if (it.nextInterval() == NO_MORE_INTERVALS) {
break;
}
if (allowOverlaps == false) {
while (hasOverlaps(it)) {
if (it.nextInterval() == NO_MORE_INTERVALS) {
break loop;
}
}
}
queue.add(it);
updateRightExtreme(it);
}
}
private boolean hasOverlaps(IntervalIterator candidate) {
for (IntervalIterator it : queue) {
if (it.start() < candidate.start()) {
if (it.end() >= candidate.start()) {
return true;
}
continue;
}
if (it.start() == candidate.start()) {
return true;
}
if (it.start() <= candidate.end()) {
return true;
}
}
return false;
}
}
}

View File

@ -71,7 +71,8 @@ public class TestIntervalQuery extends LuceneTestCase {
"w2 w1 w3 w2 w4",
"coordinate genome mapping research",
"coordinate genome research",
"greater new york"
"greater new york",
"x x x x x intend x x x message x x x message x x x addressed x x"
};
private void checkHits(Query query, int[] results) throws IOException {
@ -220,6 +221,19 @@ public class TestIntervalQuery extends LuceneTestCase {
checkHits(q, new int[]{3});
}
public void testUnorderedNoOverlaps() throws IOException {
Query q = new IntervalQuery(field,
Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("addressed"),
Intervals.maxgaps(5, Intervals.unorderedNoOverlaps(Intervals.term("message"),
Intervals.maxgaps(3, Intervals.unordered(Intervals.term("intend"), Intervals.term("message"))))))));
checkHits(q, new int[]{ 9 });
q = new IntervalQuery(field, Intervals.unorderedNoOverlaps(
Intervals.term("w2"),
Intervals.or(Intervals.term("w2"), Intervals.term("w3"))));
checkHits(q, new int[]{ 0, 1, 2, 3, 5 });
}
public void testNestedOrInUnorderedMaxGaps() throws IOException {
Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered(
Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")),

View File

@ -401,7 +401,7 @@ public class TestIntervals extends LuceneTestCase {
}
public void testUnorderedDistinct() throws IOException {
checkIntervals(Intervals.unordered(false, Intervals.term("pease"), Intervals.term("pease")),
checkIntervals(Intervals.unorderedNoOverlaps(Intervals.term("pease"), Intervals.term("pease")),
"field1", 3, new int[][]{
{},
{ 0, 3, 3, 6 },
@ -410,18 +410,18 @@ public class TestIntervals extends LuceneTestCase {
{ 0, 3, 3, 6 },
{}
});
checkIntervals(Intervals.unordered(false,
checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
Intervals.term("porridge")),
"field1", 3, new int[][]{
{},
{ 1, 4, 4, 17 },
{ 1, 4, 2, 7, 4, 17 },
{ 1, 5, 4, 7 },
{},
{ 1, 4, 4, 17 },
{ 1, 4, 2, 7, 4, 17 },
{}
});
checkIntervals(Intervals.unordered(false,
checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
Intervals.term("porridge")),
"field2", 1, new int[][]{
@ -432,6 +432,16 @@ public class TestIntervals extends LuceneTestCase {
{ 0, 3 },
{}
});
checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.term("porridge"),
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"))), "field1", 3, new int[][]{
{},
{ 1, 4, 4, 7 },
{ 1, 4, 4, 7 },
{},
{ 1, 4, 4, 7 },
{}
});
}
public void testContainedBy() throws IOException {

View File

@ -45,9 +45,14 @@ public class TestSimplifications extends LuceneTestCase {
}
public void testUnorderedOverlaps() {
// UNORDERED_NO_OVERLAPS(term) => term
IntervalsSource actual = Intervals.unordered(false, Intervals.term("term"));
assertEquals(Intervals.term("term"), actual);
// UNORDERED_NO_OVERLAPS(term, term) => ORDERED(term, term)
IntervalsSource actual = Intervals.unorderedNoOverlaps(Intervals.term("term"), Intervals.term("term"));
assertEquals(Intervals.ordered(Intervals.term("term"), Intervals.term("term")), actual);
}
public void testDisjunctionSingleton() {
IntervalsSource actual = Intervals.or(Intervals.term("a"));
assertEquals(Intervals.term("a"), actual);
}
public void testDisjunctionRemovesDuplicates() {