LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource

This commit is contained in:
Alan Woodward 2019-06-07 14:45:56 +01:00
parent 85abdbebf5
commit 67677d995e
8 changed files with 75 additions and 115 deletions

View File

@ -63,6 +63,9 @@ Bug Fixes
* LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances. * LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances.
(Vamshi Vijay Nakkirtha via Adrien Grand) (Vamshi Vijay Nakkirtha via Adrien Grand)
* LUCENE-8828: Removes the buggy 'disallow overlaps' boolean from Intervals.unordered(),
and replaces it with a new Intervals.unorderedNoOverlaps() method (Alan Woodward)
Improvements Improvements
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.intervals;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
@ -38,9 +39,19 @@ import org.apache.lucene.util.PriorityQueue;
class DisjunctionIntervalsSource extends IntervalsSource { class DisjunctionIntervalsSource extends IntervalsSource {
final Collection<IntervalsSource> subSources; final Collection<IntervalsSource> subSources;
final boolean pullUpDisjunctions;
public DisjunctionIntervalsSource(Collection<IntervalsSource> subSources) { static IntervalsSource create(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
subSources = simplify(subSources);
if (subSources.size() == 1) {
return subSources.iterator().next();
}
return new DisjunctionIntervalsSource(subSources, pullUpDisjunctions);
}
private DisjunctionIntervalsSource(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
this.subSources = simplify(subSources); this.subSources = simplify(subSources);
this.pullUpDisjunctions = pullUpDisjunctions;
} }
private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) { private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) {
@ -120,7 +131,10 @@ class DisjunctionIntervalsSource extends IntervalsSource {
@Override @Override
public Collection<IntervalsSource> pullUpDisjunctions() { public Collection<IntervalsSource> pullUpDisjunctions() {
return subSources; if (pullUpDisjunctions) {
return subSources;
}
return Collections.singletonList(this);
} }
static class DisjunctionIntervalIterator extends IntervalIterator { static class DisjunctionIntervalIterator extends IntervalIterator {

View File

@ -138,13 +138,7 @@ public final class Intervals {
* @param subSources the sources to combine * @param subSources the sources to combine
*/ */
public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) { public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) {
if (subSources.size() == 1) { return DisjunctionIntervalsSource.create(subSources, rewrite);
return subSources.get(0);
}
if (rewrite) {
return new DisjunctionIntervalsSource(subSources);
}
return new NoRewriteDisjunctionIntervalsSource(subSources);
} }
/** /**
@ -227,19 +221,16 @@ public final class Intervals {
* @param subSources an unordered set of {@link IntervalsSource}s * @param subSources an unordered set of {@link IntervalsSource}s
*/ */
public static IntervalsSource unordered(IntervalsSource... subSources) { public static IntervalsSource unordered(IntervalsSource... subSources) {
return unordered(true, subSources); return UnorderedIntervalsSource.build(Arrays.asList(subSources));
} }
/** /**
* Create an unordered {@link IntervalsSource} * Create an unordered {@link IntervalsSource} allowing no overlaps between subsources
* *
* Returns intervals in which all the subsources appear. * Returns intervals in which both the subsources appear and do not overlap.
*
* @param subSources an unordered set of {@link IntervalsSource}s
* @param allowOverlaps whether or not the sources should be allowed to overlap in a hit
*/ */
public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) { public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) {
return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps); return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a));
} }
/** /**

View File

@ -1,33 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.intervals;
import java.util.Collection;
import java.util.Collections;
class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource {
public NoRewriteDisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
super(subSources);
}
@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return Collections.singletonList(this);
}
}

View File

@ -29,17 +29,17 @@ import org.apache.lucene.util.PriorityQueue;
class UnorderedIntervalsSource extends ConjunctionIntervalsSource { class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
static IntervalsSource build(List<IntervalsSource> sources, boolean allowOverlaps) { static IntervalsSource build(List<IntervalsSource> sources) {
if (sources.size() == 1) { if (sources.size() == 1) {
return sources.get(0); return sources.get(0);
} }
return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps); return new UnorderedIntervalsSource(flatten(sources));
} }
private static List<IntervalsSource> flatten(List<IntervalsSource> sources, boolean allowOverlaps) { private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
List<IntervalsSource> flattened = new ArrayList<>(); List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) { for (IntervalsSource s : sources) {
if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) { if (s instanceof UnorderedIntervalsSource) {
flattened.addAll(((UnorderedIntervalsSource)s).subSources); flattened.addAll(((UnorderedIntervalsSource)s).subSources);
} }
else { else {
@ -49,16 +49,13 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
return flattened; return flattened;
} }
private final boolean allowOverlaps; private UnorderedIntervalsSource(List<IntervalsSource> sources) {
private UnorderedIntervalsSource(List<IntervalsSource> sources, boolean allowOverlaps) {
super(sources, true); super(sources, true);
this.allowOverlaps = allowOverlaps;
} }
@Override @Override
protected IntervalIterator combine(List<IntervalIterator> iterators) { protected IntervalIterator combine(List<IntervalIterator> iterators) {
return new UnorderedIntervalIterator(iterators, allowOverlaps); return new UnorderedIntervalIterator(iterators);
} }
@Override @Override
@ -72,25 +69,24 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
@Override @Override
public Collection<IntervalsSource> pullUpDisjunctions() { public Collection<IntervalsSource> pullUpDisjunctions() {
return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps)); return Disjunctions.pullUp(subSources, UnorderedIntervalsSource::new);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(this.subSources, this.allowOverlaps); return Objects.hash(this.subSources);
} }
@Override @Override
public boolean equals(Object other) { public boolean equals(Object other) {
if (other instanceof UnorderedIntervalsSource == false) return false; if (other instanceof UnorderedIntervalsSource == false) return false;
UnorderedIntervalsSource o = (UnorderedIntervalsSource) other; UnorderedIntervalsSource o = (UnorderedIntervalsSource) other;
return Objects.equals(this.subSources, o.subSources) && return Objects.equals(this.subSources, o.subSources);
Objects.equals(this.allowOverlaps, o.allowOverlaps);
} }
@Override @Override
public String toString() { public String toString() {
return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") + return "UNORDERED(" +
subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")"; subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
} }
@ -99,11 +95,10 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
private final PriorityQueue<IntervalIterator> queue; private final PriorityQueue<IntervalIterator> queue;
private final IntervalIterator[] subIterators; private final IntervalIterator[] subIterators;
private final int[] innerPositions; private final int[] innerPositions;
private final boolean allowOverlaps;
int start = -1, end = -1, firstEnd, queueEnd; int start = -1, end = -1, firstEnd, queueEnd;
UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) { UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
super(subIterators); super(subIterators);
this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) { this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
@Override @Override
@ -113,7 +108,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
}; };
this.subIterators = new IntervalIterator[subIterators.size()]; this.subIterators = new IntervalIterator[subIterators.size()];
this.innerPositions = new int[subIterators.size() * 2]; this.innerPositions = new int[subIterators.size() * 2];
this.allowOverlaps = allowOverlaps;
for (int i = 0; i < subIterators.size(); i++) { for (int i = 0; i < subIterators.size(); i++) {
this.subIterators[i] = subIterators.get(i); this.subIterators[i] = subIterators.get(i);
@ -143,12 +137,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
while (this.queue.size() == subIterators.length && queue.top().start() == start) { while (this.queue.size() == subIterators.length && queue.top().start() == start) {
IntervalIterator it = queue.pop(); IntervalIterator it = queue.pop();
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
if (allowOverlaps == false) {
while (hasOverlaps(it)) {
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
return start = end = IntervalIterator.NO_MORE_INTERVALS;
}
}
queue.add(it); queue.add(it);
updateRightExtreme(it); updateRightExtreme(it);
} }
@ -164,13 +152,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
return start; return start;
IntervalIterator it = queue.pop(); IntervalIterator it = queue.pop();
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
if (allowOverlaps == false) {
while (hasOverlaps(it)) {
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
return start;
}
}
}
queue.add(it); queue.add(it);
updateRightExtreme(it); updateRightExtreme(it);
} }
@ -202,39 +183,14 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
protected void reset() throws IOException { protected void reset() throws IOException {
queueEnd = start = end = -1; queueEnd = start = end = -1;
this.queue.clear(); this.queue.clear();
loop: for (IntervalIterator it : subIterators) { for (IntervalIterator it : subIterators) {
if (it.nextInterval() == NO_MORE_INTERVALS) { if (it.nextInterval() == NO_MORE_INTERVALS) {
break; break;
} }
if (allowOverlaps == false) {
while (hasOverlaps(it)) {
if (it.nextInterval() == NO_MORE_INTERVALS) {
break loop;
}
}
}
queue.add(it); queue.add(it);
updateRightExtreme(it); updateRightExtreme(it);
} }
} }
private boolean hasOverlaps(IntervalIterator candidate) {
for (IntervalIterator it : queue) {
if (it.start() < candidate.start()) {
if (it.end() >= candidate.start()) {
return true;
}
continue;
}
if (it.start() == candidate.start()) {
return true;
}
if (it.start() <= candidate.end()) {
return true;
}
}
return false;
}
} }
} }

View File

@ -71,7 +71,8 @@ public class TestIntervalQuery extends LuceneTestCase {
"w2 w1 w3 w2 w4", "w2 w1 w3 w2 w4",
"coordinate genome mapping research", "coordinate genome mapping research",
"coordinate genome research", "coordinate genome research",
"greater new york" "greater new york",
"x x x x x intend x x x message x x x message x x x addressed x x"
}; };
private void checkHits(Query query, int[] results) throws IOException { private void checkHits(Query query, int[] results) throws IOException {
@ -220,6 +221,19 @@ public class TestIntervalQuery extends LuceneTestCase {
checkHits(q, new int[]{3}); checkHits(q, new int[]{3});
} }
public void testUnorderedNoOverlaps() throws IOException {
Query q = new IntervalQuery(field,
Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("addressed"),
Intervals.maxgaps(5, Intervals.unorderedNoOverlaps(Intervals.term("message"),
Intervals.maxgaps(3, Intervals.unordered(Intervals.term("intend"), Intervals.term("message"))))))));
checkHits(q, new int[]{ 9 });
q = new IntervalQuery(field, Intervals.unorderedNoOverlaps(
Intervals.term("w2"),
Intervals.or(Intervals.term("w2"), Intervals.term("w3"))));
checkHits(q, new int[]{ 0, 1, 2, 3, 5 });
}
public void testNestedOrInUnorderedMaxGaps() throws IOException { public void testNestedOrInUnorderedMaxGaps() throws IOException {
Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered( Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered(
Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")), Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")),

View File

@ -401,7 +401,7 @@ public class TestIntervals extends LuceneTestCase {
} }
public void testUnorderedDistinct() throws IOException { public void testUnorderedDistinct() throws IOException {
checkIntervals(Intervals.unordered(false, Intervals.term("pease"), Intervals.term("pease")), checkIntervals(Intervals.unorderedNoOverlaps(Intervals.term("pease"), Intervals.term("pease")),
"field1", 3, new int[][]{ "field1", 3, new int[][]{
{}, {},
{ 0, 3, 3, 6 }, { 0, 3, 3, 6 },
@ -410,18 +410,18 @@ public class TestIntervals extends LuceneTestCase {
{ 0, 3, 3, 6 }, { 0, 3, 3, 6 },
{} {}
}); });
checkIntervals(Intervals.unordered(false, checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")), Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
Intervals.term("porridge")), Intervals.term("porridge")),
"field1", 3, new int[][]{ "field1", 3, new int[][]{
{}, {},
{ 1, 4, 4, 17 }, { 1, 4, 2, 7, 4, 17 },
{ 1, 5, 4, 7 }, { 1, 5, 4, 7 },
{}, {},
{ 1, 4, 4, 17 }, { 1, 4, 2, 7, 4, 17 },
{} {}
}); });
checkIntervals(Intervals.unordered(false, checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")), Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
Intervals.term("porridge")), Intervals.term("porridge")),
"field2", 1, new int[][]{ "field2", 1, new int[][]{
@ -432,6 +432,16 @@ public class TestIntervals extends LuceneTestCase {
{ 0, 3 }, { 0, 3 },
{} {}
}); });
checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.term("porridge"),
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"))), "field1", 3, new int[][]{
{},
{ 1, 4, 4, 7 },
{ 1, 4, 4, 7 },
{},
{ 1, 4, 4, 7 },
{}
});
} }
public void testContainedBy() throws IOException { public void testContainedBy() throws IOException {

View File

@ -45,9 +45,14 @@ public class TestSimplifications extends LuceneTestCase {
} }
public void testUnorderedOverlaps() { public void testUnorderedOverlaps() {
// UNORDERED_NO_OVERLAPS(term) => term // UNORDERED_NO_OVERLAPS(term, term) => ORDERED(term, term)
IntervalsSource actual = Intervals.unordered(false, Intervals.term("term")); IntervalsSource actual = Intervals.unorderedNoOverlaps(Intervals.term("term"), Intervals.term("term"));
assertEquals(Intervals.term("term"), actual); assertEquals(Intervals.ordered(Intervals.term("term"), Intervals.term("term")), actual);
}
public void testDisjunctionSingleton() {
IntervalsSource actual = Intervals.or(Intervals.term("a"));
assertEquals(Intervals.term("a"), actual);
} }
public void testDisjunctionRemovesDuplicates() { public void testDisjunctionRemovesDuplicates() {