From 67677d995e8fca6214844b0eb814d06138bce0a2 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 7 Jun 2019 14:45:56 +0100 Subject: [PATCH] LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource --- lucene/CHANGES.txt | 3 + .../intervals/DisjunctionIntervalsSource.java | 18 ++++- .../lucene/search/intervals/Intervals.java | 21 ++---- .../NoRewriteDisjunctionIntervalsSource.java | 33 --------- .../intervals/UnorderedIntervalsSource.java | 68 ++++--------------- .../search/intervals/TestIntervalQuery.java | 16 ++++- .../search/intervals/TestIntervals.java | 20 ++++-- .../search/intervals/TestSimplifications.java | 11 ++- 8 files changed, 75 insertions(+), 115 deletions(-) delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index cb27213764e..b476b73652a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -63,6 +63,9 @@ Bug Fixes * LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances. (Vamshi Vijay Nakkirtha via Adrien Grand) +* LUCENE-8828: Removes the buggy 'disallow overlaps' boolean from Intervals.unordered(), + and replaces it with a new Intervals.unorderedNoOverlaps() method (Alan Woodward) + Improvements * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java index 350a29d89a1..5e81d99c783 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.intervals; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -38,9 +39,19 @@ import org.apache.lucene.util.PriorityQueue; class DisjunctionIntervalsSource extends IntervalsSource { final Collection subSources; + final boolean pullUpDisjunctions; - public DisjunctionIntervalsSource(Collection subSources) { + static IntervalsSource create(Collection subSources, boolean pullUpDisjunctions) { + subSources = simplify(subSources); + if (subSources.size() == 1) { + return subSources.iterator().next(); + } + return new DisjunctionIntervalsSource(subSources, pullUpDisjunctions); + } + + private DisjunctionIntervalsSource(Collection subSources, boolean pullUpDisjunctions) { this.subSources = simplify(subSources); + this.pullUpDisjunctions = pullUpDisjunctions; } private static Collection simplify(Collection sources) { @@ -120,7 +131,10 @@ class DisjunctionIntervalsSource extends IntervalsSource { @Override public Collection pullUpDisjunctions() { - return subSources; + if (pullUpDisjunctions) { + return subSources; + } + return Collections.singletonList(this); } static class DisjunctionIntervalIterator extends IntervalIterator { diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java index 591fbb178aa..d579c6f4eb4 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java @@ -138,13 +138,7 @@ public final class Intervals { * @param subSources the sources to combine */ public static IntervalsSource or(boolean rewrite, List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - if (rewrite) { - return new DisjunctionIntervalsSource(subSources); - } - return new NoRewriteDisjunctionIntervalsSource(subSources); + return DisjunctionIntervalsSource.create(subSources, rewrite); } /** @@ -227,19 +221,16 @@ public final class Intervals { * @param subSources an unordered set of {@link IntervalsSource}s */ public static IntervalsSource unordered(IntervalsSource... subSources) { - return unordered(true, subSources); + return UnorderedIntervalsSource.build(Arrays.asList(subSources)); } /** - * Create an unordered {@link IntervalsSource} + * Create an unordered {@link IntervalsSource} allowing no overlaps between subsources * - * Returns intervals in which all the subsources appear. - * - * @param subSources an unordered set of {@link IntervalsSource}s - * @param allowOverlaps whether or not the sources should be allowed to overlap in a hit + * Returns intervals in which both the subsources appear and do not overlap. */ - public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) { - return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps); + public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) { + return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a)); } /** diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java deleted file mode 100644 index e273f19913c..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search.intervals; - -import java.util.Collection; -import java.util.Collections; - -class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource { - - public NoRewriteDisjunctionIntervalsSource(Collection subSources) { - super(subSources); - } - - @Override - public Collection pullUpDisjunctions() { - return Collections.singletonList(this); - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java index adf9cc06f1d..aaf0184d27b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java @@ -29,17 +29,17 @@ import org.apache.lucene.util.PriorityQueue; class UnorderedIntervalsSource extends ConjunctionIntervalsSource { - static IntervalsSource build(List sources, boolean allowOverlaps) { + static IntervalsSource build(List sources) { if (sources.size() == 1) { return sources.get(0); } - return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps); + return new UnorderedIntervalsSource(flatten(sources)); } - private static List flatten(List sources, boolean allowOverlaps) { + private static List flatten(List sources) { List flattened = new ArrayList<>(); for (IntervalsSource s : sources) { - if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) { + if (s instanceof UnorderedIntervalsSource) { flattened.addAll(((UnorderedIntervalsSource)s).subSources); } else { @@ -49,16 +49,13 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { return flattened; } - private final boolean allowOverlaps; - - private UnorderedIntervalsSource(List sources, boolean allowOverlaps) { + private UnorderedIntervalsSource(List sources) { super(sources, true); - this.allowOverlaps = allowOverlaps; } @Override protected IntervalIterator combine(List iterators) { - return new UnorderedIntervalIterator(iterators, allowOverlaps); + return new UnorderedIntervalIterator(iterators); } @Override @@ -72,25 +69,24 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { @Override public Collection pullUpDisjunctions() { - return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps)); + return Disjunctions.pullUp(subSources, UnorderedIntervalsSource::new); } @Override public int hashCode() { - return Objects.hash(this.subSources, this.allowOverlaps); + return Objects.hash(this.subSources); } @Override public boolean equals(Object other) { if (other instanceof UnorderedIntervalsSource == false) return false; UnorderedIntervalsSource o = (UnorderedIntervalsSource) other; - return Objects.equals(this.subSources, o.subSources) && - Objects.equals(this.allowOverlaps, o.allowOverlaps); + return Objects.equals(this.subSources, o.subSources); } @Override public String toString() { - return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") + + return "UNORDERED(" + subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")"; } @@ -99,11 +95,10 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { private final PriorityQueue queue; private final IntervalIterator[] subIterators; private final int[] innerPositions; - private final boolean allowOverlaps; int start = -1, end = -1, firstEnd, queueEnd; - UnorderedIntervalIterator(List subIterators, boolean allowOverlaps) { + UnorderedIntervalIterator(List subIterators) { super(subIterators); this.queue = new PriorityQueue(subIterators.size()) { @Override @@ -113,7 +108,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { }; this.subIterators = new IntervalIterator[subIterators.size()]; this.innerPositions = new int[subIterators.size() * 2]; - this.allowOverlaps = allowOverlaps; for (int i = 0; i < subIterators.size(); i++) { this.subIterators[i] = subIterators.get(i); @@ -143,12 +137,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { while (this.queue.size() == subIterators.length && queue.top().start() == start) { IntervalIterator it = queue.pop(); if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - if (allowOverlaps == false) { - while (hasOverlaps(it)) { - if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) - return start = end = IntervalIterator.NO_MORE_INTERVALS; - } - } queue.add(it); updateRightExtreme(it); } @@ -164,13 +152,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { return start; IntervalIterator it = queue.pop(); if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) { - if (allowOverlaps == false) { - while (hasOverlaps(it)) { - if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) { - return start; - } - } - } queue.add(it); updateRightExtreme(it); } @@ -202,39 +183,14 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource { protected void reset() throws IOException { queueEnd = start = end = -1; this.queue.clear(); - loop: for (IntervalIterator it : subIterators) { + for (IntervalIterator it : subIterators) { if (it.nextInterval() == NO_MORE_INTERVALS) { break; } - if (allowOverlaps == false) { - while (hasOverlaps(it)) { - if (it.nextInterval() == NO_MORE_INTERVALS) { - break loop; - } - } - } queue.add(it); updateRightExtreme(it); } } - private boolean hasOverlaps(IntervalIterator candidate) { - for (IntervalIterator it : queue) { - if (it.start() < candidate.start()) { - if (it.end() >= candidate.start()) { - return true; - } - continue; - } - if (it.start() == candidate.start()) { - return true; - } - if (it.start() <= candidate.end()) { - return true; - } - } - return false; - } - } } diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java index a8998ef4648..61757b1b496 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java @@ -71,7 +71,8 @@ public class TestIntervalQuery extends LuceneTestCase { "w2 w1 w3 w2 w4", "coordinate genome mapping research", "coordinate genome research", - "greater new york" + "greater new york", + "x x x x x intend x x x message x x x message x x x addressed x x" }; private void checkHits(Query query, int[] results) throws IOException { @@ -220,6 +221,19 @@ public class TestIntervalQuery extends LuceneTestCase { checkHits(q, new int[]{3}); } + public void testUnorderedNoOverlaps() throws IOException { + Query q = new IntervalQuery(field, + Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("addressed"), + Intervals.maxgaps(5, Intervals.unorderedNoOverlaps(Intervals.term("message"), + Intervals.maxgaps(3, Intervals.unordered(Intervals.term("intend"), Intervals.term("message")))))))); + checkHits(q, new int[]{ 9 }); + + q = new IntervalQuery(field, Intervals.unorderedNoOverlaps( + Intervals.term("w2"), + Intervals.or(Intervals.term("w2"), Intervals.term("w3")))); + checkHits(q, new int[]{ 0, 1, 2, 3, 5 }); + } + public void testNestedOrInUnorderedMaxGaps() throws IOException { Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered( Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")), diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java index ea80d5ffbbf..8bf7f8a33bb 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java @@ -401,7 +401,7 @@ public class TestIntervals extends LuceneTestCase { } public void testUnorderedDistinct() throws IOException { - checkIntervals(Intervals.unordered(false, Intervals.term("pease"), Intervals.term("pease")), + checkIntervals(Intervals.unorderedNoOverlaps(Intervals.term("pease"), Intervals.term("pease")), "field1", 3, new int[][]{ {}, { 0, 3, 3, 6 }, @@ -410,18 +410,18 @@ public class TestIntervals extends LuceneTestCase { { 0, 3, 3, 6 }, {} }); - checkIntervals(Intervals.unordered(false, + checkIntervals(Intervals.unorderedNoOverlaps( Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")), Intervals.term("porridge")), "field1", 3, new int[][]{ {}, - { 1, 4, 4, 17 }, + { 1, 4, 2, 7, 4, 17 }, { 1, 5, 4, 7 }, {}, - { 1, 4, 4, 17 }, + { 1, 4, 2, 7, 4, 17 }, {} }); - checkIntervals(Intervals.unordered(false, + checkIntervals(Intervals.unorderedNoOverlaps( Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")), Intervals.term("porridge")), "field2", 1, new int[][]{ @@ -432,6 +432,16 @@ public class TestIntervals extends LuceneTestCase { { 0, 3 }, {} }); + checkIntervals(Intervals.unorderedNoOverlaps( + Intervals.term("porridge"), + Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"))), "field1", 3, new int[][]{ + {}, + { 1, 4, 4, 7 }, + { 1, 4, 4, 7 }, + {}, + { 1, 4, 4, 7 }, + {} + }); } public void testContainedBy() throws IOException { diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java index 7f64cd4f832..c97dfbb9946 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java @@ -45,9 +45,14 @@ public class TestSimplifications extends LuceneTestCase { } public void testUnorderedOverlaps() { - // UNORDERED_NO_OVERLAPS(term) => term - IntervalsSource actual = Intervals.unordered(false, Intervals.term("term")); - assertEquals(Intervals.term("term"), actual); + // UNORDERED_NO_OVERLAPS(term, term) => ORDERED(term, term) + IntervalsSource actual = Intervals.unorderedNoOverlaps(Intervals.term("term"), Intervals.term("term")); + assertEquals(Intervals.ordered(Intervals.term("term"), Intervals.term("term")), actual); + } + + public void testDisjunctionSingleton() { + IntervalsSource actual = Intervals.or(Intervals.term("a")); + assertEquals(Intervals.term("a"), actual); } public void testDisjunctionRemovesDuplicates() {