mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
LUCENE-8477: Automatically rewrite disjunctions when internal gaps matter (#620)
We have a number of IntervalsSource implementations where automatic minimization of disjunctions can lead to surprising results: * PHRASE queries can miss matches because a longer matching sub-source is minimized away, leaving a gap * MAXGAPS queries can miss matches for the same reason * CONTAINING, NOT_CONTAINING, CONTAINED_BY and NOT_CONTAINED_BY queries can miss matches if the 'big' interval gets minimized The proper way to deal with this is to rewrite the queries by pulling disjunctions to the top of the query tree, so that PHRASE("a", OR(PHRASE("b", "c"), "c")) is rewritten to OR(PHRASE("a", "b", "c"), PHRASE("a", "c")). To be able to do this generally, we need to add a new pullUpDisjunctions() method to IntervalsSource that performs this rewriting for each source that it would apply to. Because these rewritten queries will in general be less efficient due to the duplication of effort (eg the rewritten PHRASE query above pulls 5 term iterators rather than 4 in the original), we also add an option to Intervals.or() that will prevent this happening, so that consumers can choose speed over accuracy if it suits their usecase.
This commit is contained in:
parent
e7939d5907
commit
f1782d0dd1
@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
class BlockIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(List<IntervalsSource> subSources) {
|
||||
if (subSources.size() == 1) {
|
||||
return subSources.get(0);
|
||||
}
|
||||
return Intervals.or(Disjunctions.pullUp(subSources, BlockIntervalsSource::new));
|
||||
}
|
||||
|
||||
private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
|
||||
List<IntervalsSource> flattened = new ArrayList<>();
|
||||
for (IntervalsSource s : sources) {
|
||||
if (s instanceof BlockIntervalsSource) {
|
||||
flattened.addAll(((BlockIntervalsSource)s).subSources);
|
||||
}
|
||||
else {
|
||||
flattened.add(s);
|
||||
}
|
||||
}
|
||||
return flattened;
|
||||
}
|
||||
|
||||
private BlockIntervalsSource(List<IntervalsSource> sources) {
|
||||
super(flatten(sources), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
return new BlockIntervalIterator(iterators);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
int minExtent = 0;
|
||||
for (IntervalsSource subSource : subSources) {
|
||||
minExtent += subSource.minExtent();
|
||||
}
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this); // Disjunctions already pulled up in build()
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof BlockIntervalsSource == false) return false;
|
||||
BlockIntervalsSource b = (BlockIntervalsSource) other;
|
||||
return Objects.equals(this.subSources, b.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK(" + subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
|
||||
}
|
||||
|
||||
private static class BlockIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
int start = -1, end = -1;
|
||||
|
||||
BlockIntervalIterator(List<IntervalIterator> subIterators) {
|
||||
super(subIterators);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
int i = 1;
|
||||
while (i < subIterators.size()) {
|
||||
while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) {
|
||||
if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) {
|
||||
i = i + 1;
|
||||
}
|
||||
else {
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
i = 1;
|
||||
}
|
||||
}
|
||||
start = subIterators.get(0).start();
|
||||
end = subIterators.get(subIterators.size() - 1).end();
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() {
|
||||
start = end = -1;
|
||||
}
|
||||
}
|
||||
}
|
@ -20,7 +20,6 @@ package org.apache.lucene.search.intervals;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
@ -31,28 +30,15 @@ import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
|
||||
class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
abstract class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
|
||||
protected final List<IntervalsSource> subSources;
|
||||
protected final IntervalFunction function;
|
||||
protected final boolean isMinimizing;
|
||||
|
||||
ConjunctionIntervalsSource(List<IntervalsSource> subSources, IntervalFunction function) {
|
||||
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources, boolean isMinimizing) {
|
||||
assert subSources.size() > 1;
|
||||
this.subSources = subSources;
|
||||
this.function = function;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
ConjunctionIntervalsSource that = (ConjunctionIntervalsSource) o;
|
||||
return Objects.equals(subSources, that.subSources) &&
|
||||
Objects.equals(function, that.function);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return function + subSources.stream().map(Object::toString).collect(Collectors.joining(",", "(", ")"));
|
||||
this.isMinimizing = isMinimizing;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -65,16 +51,7 @@ class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
int minExtent = 0;
|
||||
for (IntervalsSource source : subSources) {
|
||||
minExtent += source.minExtent();
|
||||
}
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
public final IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
List<IntervalIterator> subIntervals = new ArrayList<>();
|
||||
for (IntervalsSource source : subSources) {
|
||||
IntervalIterator it = source.intervals(field, ctx);
|
||||
@ -82,32 +59,32 @@ class ConjunctionIntervalsSource extends IntervalsSource {
|
||||
return null;
|
||||
subIntervals.add(it);
|
||||
}
|
||||
return function.apply(subIntervals);
|
||||
return combine(subIntervals);
|
||||
}
|
||||
|
||||
protected abstract IntervalIterator combine(List<IntervalIterator> iterators);
|
||||
|
||||
@Override
|
||||
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
List<MatchesIterator> subs = new ArrayList<>();
|
||||
for (IntervalsSource source : subSources) {
|
||||
MatchesIterator mi = source.matches(field, ctx, doc);
|
||||
if (mi == null) {
|
||||
return null;
|
||||
}
|
||||
if (isMinimizing) {
|
||||
mi = new CachingMatchesIterator(mi);
|
||||
}
|
||||
subs.add(mi);
|
||||
}
|
||||
IntervalIterator it = function.apply(subs.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList()));
|
||||
IntervalIterator it = combine(subs.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList()));
|
||||
if (it.advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return null;
|
||||
}
|
||||
return new ConjunctionMatchesIterator(it, subs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(subSources, function);
|
||||
return isMinimizing ? new MinimizingConjunctionMatchesIterator(it, subs) : new ConjunctionMatchesIterator(it, subs);
|
||||
}
|
||||
|
||||
private static class ConjunctionMatchesIterator implements MatchesIterator {
|
||||
|
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
class ContainedByIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(IntervalsSource small, IntervalsSource big) {
|
||||
return Intervals.or(Disjunctions.pullUp(big, s -> new ContainedByIntervalsSource(small, s)));
|
||||
}
|
||||
|
||||
private final IntervalsSource small;
|
||||
private final IntervalsSource big;
|
||||
|
||||
private ContainedByIntervalsSource(IntervalsSource small, IntervalsSource big) {
|
||||
super(Arrays.asList(small, big), false);
|
||||
this.small = small;
|
||||
this.big = big;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
assert iterators.size() == 2;
|
||||
IntervalIterator a = iterators.get(0);
|
||||
IntervalIterator b = iterators.get(1);
|
||||
return new FilteringIntervalIterator(a, b) {
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.end()) {
|
||||
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
if (b.start() <= a.start())
|
||||
return a.start();
|
||||
}
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
return small.minExtent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(big, s -> new ContainedByIntervalsSource(small, s));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof ContainedByIntervalsSource == false) return false;
|
||||
ContainedByIntervalsSource o = (ContainedByIntervalsSource) other;
|
||||
return Objects.equals(this.subSources, o.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CONTAINED_BY(" + small + "," + big + ")";
|
||||
}
|
||||
}
|
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
class ContainingIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
private final IntervalsSource big;
|
||||
private final IntervalsSource small;
|
||||
|
||||
static IntervalsSource build(IntervalsSource big, IntervalsSource small) {
|
||||
return Intervals.or(Disjunctions.pullUp(big, s -> new ContainingIntervalsSource(s, small)));
|
||||
}
|
||||
|
||||
private ContainingIntervalsSource(IntervalsSource big, IntervalsSource small) {
|
||||
super(Arrays.asList(big, small), false);
|
||||
this.big = big;
|
||||
this.small = small;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
assert iterators.size() == 2;
|
||||
IntervalIterator a = iterators.get(0);
|
||||
IntervalIterator b = iterators.get(1);
|
||||
return new FilteringIntervalIterator(a, b) {
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
while (b.start() < a.start() && b.end() < a.end()) {
|
||||
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
if (a.start() <= b.start() && a.end() >= b.end())
|
||||
return a.start();
|
||||
}
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
return big.minExtent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(big, s -> new ContainingIntervalsSource(s, small));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(this.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof ContainingIntervalsSource == false) return false;
|
||||
ConjunctionIntervalsSource o = (ContainingIntervalsSource) other;
|
||||
return Objects.equals(this.subSources, o.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CONTAINING(" + big + "," + small + ")";
|
||||
}
|
||||
}
|
@ -1,235 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A function that takes two interval iterators and combines them to produce a third,
|
||||
* generally by computing a difference interval between them
|
||||
*/
|
||||
abstract class DifferenceIntervalFunction {
|
||||
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
@Override
|
||||
public abstract boolean equals(Object obj);
|
||||
|
||||
@Override
|
||||
public abstract String toString();
|
||||
|
||||
/**
|
||||
* Combine two interval iterators into a third
|
||||
*/
|
||||
public abstract IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend);
|
||||
|
||||
/**
|
||||
* Filters the minuend iterator so that only intervals that do not overlap intervals from the
|
||||
* subtrahend iterator are returned
|
||||
*/
|
||||
static final DifferenceIntervalFunction NON_OVERLAPPING = new SingletonFunction("NON_OVERLAPPING") {
|
||||
@Override
|
||||
public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
return new NonOverlappingIterator(minuend, subtrahend);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Filters the minuend iterator so that only intervals that do not contain intervals from the
|
||||
* subtrahend iterator are returned
|
||||
*/
|
||||
static final DifferenceIntervalFunction NOT_CONTAINING = new SingletonFunction("NOT_CONTAINING") {
|
||||
@Override
|
||||
public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
return new NotContainingIterator(minuend, subtrahend);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Filters the minuend iterator so that only intervals that are not contained by intervals from
|
||||
* the subtrahend iterator are returned
|
||||
*/
|
||||
static final DifferenceIntervalFunction NOT_CONTAINED_BY = new SingletonFunction("NOT_CONTAINED_BY") {
|
||||
@Override
|
||||
public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
return new NotContainedByIterator(minuend, subtrahend);
|
||||
}
|
||||
};
|
||||
|
||||
private static abstract class RelativeIterator extends IntervalIterator {
|
||||
|
||||
final IntervalIterator a;
|
||||
final IntervalIterator b;
|
||||
|
||||
boolean bpos;
|
||||
|
||||
RelativeIterator(IntervalIterator a, IntervalIterator b) {
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return a.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int doc = a.nextDoc();
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = a.advance(target);
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return a.cost();
|
||||
}
|
||||
|
||||
protected void reset() throws IOException {
|
||||
int doc = a.docID();
|
||||
bpos = b.docID() == doc ||
|
||||
(b.docID() < doc && b.advance(doc) == doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return a.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return a.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return a.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return a.matchCost() + b.matchCost();
|
||||
}
|
||||
}
|
||||
|
||||
private static class NonOverlappingIterator extends RelativeIterator {
|
||||
|
||||
private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return a.nextInterval();
|
||||
while (a.nextInterval() != NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.start()) {
|
||||
if (b.nextInterval() == NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return a.start();
|
||||
}
|
||||
}
|
||||
if (b.start() > a.end())
|
||||
return a.start();
|
||||
}
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
|
||||
private static class NotContainingIterator extends RelativeIterator {
|
||||
|
||||
private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return a.nextInterval();
|
||||
while (a.nextInterval() != NO_MORE_INTERVALS) {
|
||||
while (b.start() < a.start() && b.end() < a.end()) {
|
||||
if (b.nextInterval() == NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return a.start();
|
||||
}
|
||||
}
|
||||
if (b.start() > a.end())
|
||||
return a.start();
|
||||
}
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class NotContainedByIterator extends RelativeIterator {
|
||||
|
||||
NotContainedByIterator(IntervalIterator a, IntervalIterator b) {
|
||||
super(a, b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return a.nextInterval();
|
||||
while (a.nextInterval() != NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.end()) {
|
||||
if (b.nextInterval() == NO_MORE_INTERVALS)
|
||||
return a.start();
|
||||
}
|
||||
if (a.start() < b.start())
|
||||
return a.start();
|
||||
}
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
|
||||
private static abstract class SingletonFunction extends DifferenceIntervalFunction {
|
||||
|
||||
private final String name;
|
||||
|
||||
SingletonFunction(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return obj == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -18,38 +18,37 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
|
||||
class DifferenceIntervalsSource extends IntervalsSource {
|
||||
abstract class DifferenceIntervalsSource extends IntervalsSource {
|
||||
|
||||
private final IntervalsSource minuend;
|
||||
private final IntervalsSource subtrahend;
|
||||
private final DifferenceIntervalFunction function;
|
||||
final IntervalsSource minuend;
|
||||
final IntervalsSource subtrahend;
|
||||
|
||||
DifferenceIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend, DifferenceIntervalFunction function) {
|
||||
DifferenceIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
this.minuend = minuend;
|
||||
this.subtrahend = subtrahend;
|
||||
this.function = function;
|
||||
}
|
||||
|
||||
protected abstract IntervalIterator combine(IntervalIterator minuend, IntervalIterator subtrahend);
|
||||
|
||||
@Override
|
||||
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
public final IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
IntervalIterator minIt = minuend.intervals(field, ctx);
|
||||
if (minIt == null)
|
||||
return null;
|
||||
IntervalIterator subIt = subtrahend.intervals(field, ctx);
|
||||
if (subIt == null)
|
||||
return minIt;
|
||||
return function.apply(minIt, subIt);
|
||||
return combine(minIt, subIt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
MatchesIterator minIt = minuend.matches(field, ctx, doc);
|
||||
if (minIt == null) {
|
||||
return null;
|
||||
@ -58,30 +57,10 @@ class DifferenceIntervalsSource extends IntervalsSource {
|
||||
if (subIt == null) {
|
||||
return minIt;
|
||||
}
|
||||
IntervalIterator difference = function.apply(IntervalMatches.wrapMatches(minIt, doc), IntervalMatches.wrapMatches(subIt, doc));
|
||||
IntervalIterator difference = combine(IntervalMatches.wrapMatches(minIt, doc), IntervalMatches.wrapMatches(subIt, doc));
|
||||
return IntervalMatches.asMatches(difference, minIt, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
DifferenceIntervalsSource that = (DifferenceIntervalsSource) o;
|
||||
return Objects.equals(minuend, that.minuend) &&
|
||||
Objects.equals(subtrahend, that.subtrahend) &&
|
||||
Objects.equals(function, that.function);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(minuend, subtrahend, function);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return function + "(" + minuend + ", " + subtrahend + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(String field, QueryVisitor visitor) {
|
||||
IntervalQuery q = new IntervalQuery(field, this);
|
||||
|
@ -19,8 +19,11 @@ package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
@ -34,10 +37,23 @@ import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
class DisjunctionIntervalsSource extends IntervalsSource {
|
||||
|
||||
final List<IntervalsSource> subSources;
|
||||
final Collection<IntervalsSource> subSources;
|
||||
|
||||
public DisjunctionIntervalsSource(List<IntervalsSource> subSources) {
|
||||
this.subSources = subSources;
|
||||
public DisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
|
||||
this.subSources = simplify(subSources);
|
||||
}
|
||||
|
||||
private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) {
|
||||
Set<IntervalsSource> simplified = new HashSet<>();
|
||||
for (IntervalsSource source : sources) {
|
||||
if (source instanceof DisjunctionIntervalsSource) {
|
||||
simplified.addAll(source.pullUpDisjunctions());
|
||||
}
|
||||
else {
|
||||
simplified.add(source);
|
||||
}
|
||||
}
|
||||
return simplified;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -95,13 +111,18 @@ class DisjunctionIntervalsSource extends IntervalsSource {
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
int minExtent = subSources.get(0).minExtent();
|
||||
for (int i = 1; i < subSources.size(); i++) {
|
||||
minExtent = Math.min(minExtent, subSources.get(i).minExtent());
|
||||
int minExtent = Integer.MAX_VALUE;
|
||||
for (IntervalsSource subSource : subSources) {
|
||||
minExtent = Math.min(minExtent, subSource.minExtent());
|
||||
}
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return subSources;
|
||||
}
|
||||
|
||||
static class DisjunctionIntervalIterator extends IntervalIterator {
|
||||
|
||||
final DocIdSetIterator approximation;
|
||||
|
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
||||
final class Disjunctions {
|
||||
|
||||
// Given a list of sources that contain disjunctions, and a combiner function,
|
||||
// pulls the disjunctions to the top of the source tree
|
||||
|
||||
// eg FUNC(a, b, OR(c, "d e")) => [FUNC(a, b, c), FUNC(a, b, "d e")]
|
||||
|
||||
public static List<IntervalsSource> pullUp(List<IntervalsSource> sources,
|
||||
Function<List<IntervalsSource>, IntervalsSource> function) {
|
||||
|
||||
List<List<IntervalsSource>> rewritten = new ArrayList<>();
|
||||
rewritten.add(new ArrayList<>());
|
||||
for (IntervalsSource source : sources) {
|
||||
List<IntervalsSource> disjuncts = splitDisjunctions(source);
|
||||
if (disjuncts.size() == 1) {
|
||||
rewritten.forEach(l -> l.add(disjuncts.get(0)));
|
||||
}
|
||||
else {
|
||||
if (rewritten.size() * disjuncts.size() > BooleanQuery.getMaxClauseCount()) {
|
||||
throw new IllegalArgumentException("Too many disjunctions to expand");
|
||||
}
|
||||
List<List<IntervalsSource>> toAdd = new ArrayList<>();
|
||||
for (IntervalsSource disj : disjuncts) {
|
||||
// clone the rewritten list, then append the disjunct
|
||||
for (List<IntervalsSource> subList : rewritten) {
|
||||
List<IntervalsSource> l = new ArrayList<>(subList);
|
||||
l.add(disj);
|
||||
toAdd.add(l);
|
||||
}
|
||||
}
|
||||
rewritten = toAdd;
|
||||
}
|
||||
}
|
||||
if (rewritten.size() == 1) {
|
||||
return Collections.singletonList(function.apply(rewritten.get(0)));
|
||||
}
|
||||
return rewritten.stream().map(function).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
// Given a source containing disjunctions, and a mapping function,
|
||||
// pulls the disjunctions to the top of the source tree
|
||||
public static List<IntervalsSource> pullUp(IntervalsSource source, Function<IntervalsSource, IntervalsSource> function) {
|
||||
List<IntervalsSource> disjuncts = splitDisjunctions(source);
|
||||
if (disjuncts.size() == 1) {
|
||||
return Collections.singletonList(function.apply(disjuncts.get(0)));
|
||||
}
|
||||
return disjuncts.stream().map(function).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
// Separate out disjunctions into individual sources
|
||||
// Clauses that have a minExtent of 1 are grouped together and treated as a single
|
||||
// source, as any overlapping intervals of length 1 can be treated as identical,
|
||||
// and we know that all combinatorial sources have a minExtent > 1
|
||||
private static List<IntervalsSource> splitDisjunctions(IntervalsSource source) {
|
||||
List<IntervalsSource> singletons = new ArrayList<>();
|
||||
List<IntervalsSource> nonSingletons = new ArrayList<>();
|
||||
for (IntervalsSource disj : source.pullUpDisjunctions()) {
|
||||
if (disj.minExtent() == 1) {
|
||||
singletons.add(disj);
|
||||
}
|
||||
else {
|
||||
nonSingletons.add(disj);
|
||||
}
|
||||
}
|
||||
List<IntervalsSource> split = new ArrayList<>();
|
||||
if (singletons.size() > 0) {
|
||||
split.add(Intervals.or(singletons.toArray(new IntervalsSource[0])));
|
||||
}
|
||||
split.addAll(nonSingletons);
|
||||
return split;
|
||||
}
|
||||
|
||||
}
|
@ -18,7 +18,10 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
@ -69,6 +72,15 @@ class ExtendedIntervalsSource extends IntervalsSource {
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
Collection<IntervalsSource> inner = source.pullUpDisjunctions();
|
||||
if (inner.size() == 0) {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
return inner.stream().map(s -> new ExtendedIntervalsSource(s, before, after)).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
@ -18,7 +18,10 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
@ -29,15 +32,58 @@ import org.apache.lucene.search.QueryVisitor;
|
||||
*/
|
||||
public abstract class FilteredIntervalsSource extends IntervalsSource {
|
||||
|
||||
public static IntervalsSource maxGaps(IntervalsSource in, int maxGaps) {
|
||||
return Intervals.or(in.pullUpDisjunctions().stream().map(s -> new MaxGaps(s, maxGaps)).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
private static class MaxGaps extends FilteredIntervalsSource {
|
||||
|
||||
private final int maxGaps;
|
||||
|
||||
MaxGaps(IntervalsSource in, int maxGaps) {
|
||||
super("MAXGAPS/" + maxGaps, in);
|
||||
this.maxGaps = maxGaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean accept(IntervalIterator it) {
|
||||
return it.gaps() <= maxGaps;
|
||||
}
|
||||
}
|
||||
|
||||
public static IntervalsSource maxWidth(IntervalsSource in, int maxWidth) {
|
||||
return new MaxWidth(in, maxWidth);
|
||||
}
|
||||
|
||||
private static class MaxWidth extends FilteredIntervalsSource {
|
||||
|
||||
private final int maxWidth;
|
||||
|
||||
MaxWidth(IntervalsSource in, int maxWidth) {
|
||||
super("MAXWIDTH/" + maxWidth, in);
|
||||
this.maxWidth = maxWidth;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean accept(IntervalIterator it) {
|
||||
return (it.end() - it.start()) + 1 <= maxWidth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(in, s -> new MaxWidth(s, maxWidth));
|
||||
}
|
||||
}
|
||||
|
||||
private final String name;
|
||||
private final IntervalsSource in;
|
||||
protected final IntervalsSource in;
|
||||
|
||||
/**
|
||||
* Create a new FilteredIntervalsSource
|
||||
* @param name the name of the filter
|
||||
* @param in the source to filter
|
||||
*/
|
||||
public FilteredIntervalsSource(String name, IntervalsSource in) {
|
||||
private FilteredIntervalsSource(String name, IntervalsSource in) {
|
||||
this.name = name;
|
||||
this.in = in;
|
||||
}
|
||||
@ -81,6 +127,11 @@ public abstract class FilteredIntervalsSource extends IntervalsSource {
|
||||
return in.minExtent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(String field, QueryVisitor visitor) {
|
||||
in.visit(field, visitor);
|
||||
@ -89,7 +140,7 @@ public abstract class FilteredIntervalsSource extends IntervalsSource {
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
if (o == null || o instanceof FilteredIntervalsSource == false) return false;
|
||||
FilteredIntervalsSource that = (FilteredIntervalsSource) o;
|
||||
return Objects.equals(name, that.name) &&
|
||||
Objects.equals(in, that.in);
|
||||
|
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
abstract class FilteringIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
final IntervalIterator a;
|
||||
final IntervalIterator b;
|
||||
|
||||
boolean bpos;
|
||||
|
||||
protected FilteringIntervalIterator(IntervalIterator a, IntervalIterator b) {
|
||||
super(Arrays.asList(a, b));
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
if (bpos == false) {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
return a.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
if (bpos == false) {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
return a.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return a.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() throws IOException {
|
||||
bpos = b.nextInterval() != NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
@ -18,7 +18,10 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
@ -54,6 +57,15 @@ class FixedFieldIntervalsSource extends IntervalsSource {
|
||||
return source.minExtent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
Collection<IntervalsSource> inner = source.pullUpDisjunctions();
|
||||
if (inner.size() == 1) {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
return inner.stream().map(s -> new FixedFieldIntervalsSource(field, s)).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
@ -85,7 +85,7 @@ public abstract class IntervalFilter extends IntervalIterator {
|
||||
do {
|
||||
next = in.nextInterval();
|
||||
}
|
||||
while (accept() == false && next != IntervalIterator.NO_MORE_INTERVALS);
|
||||
while (next != IntervalIterator.NO_MORE_INTERVALS && accept() == false);
|
||||
return next;
|
||||
}
|
||||
|
||||
|
@ -1,501 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Combine a list of {@link IntervalIterator}s into another
|
||||
*/
|
||||
abstract class IntervalFunction {
|
||||
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
@Override
|
||||
public abstract boolean equals(Object obj);
|
||||
|
||||
@Override
|
||||
public abstract String toString();
|
||||
|
||||
/**
|
||||
* Combine the iterators into another iterator
|
||||
*/
|
||||
public abstract IntervalIterator apply(List<IntervalIterator> iterators);
|
||||
|
||||
static final IntervalFunction BLOCK = new SingletonFunction("BLOCK") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> iterators) {
|
||||
return new BlockIntervalIterator(iterators);
|
||||
}
|
||||
};
|
||||
|
||||
private static class BlockIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
int start = -1, end = -1;
|
||||
|
||||
BlockIntervalIterator(List<IntervalIterator> subIterators) {
|
||||
super(subIterators);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
int i = 1;
|
||||
while (i < subIterators.size()) {
|
||||
while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) {
|
||||
if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) {
|
||||
i = i + 1;
|
||||
}
|
||||
else {
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
i = 1;
|
||||
}
|
||||
}
|
||||
start = subIterators.get(0).start();
|
||||
end = subIterators.get(subIterators.size() - 1).end();
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() {
|
||||
start = end = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an iterator over intervals where the subiterators appear in a given order
|
||||
*/
|
||||
static final IntervalFunction ORDERED = new SingletonFunction("ORDERED") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> intervalIterators) {
|
||||
return new OrderedIntervalIterator(intervalIterators);
|
||||
}
|
||||
};
|
||||
|
||||
private static class OrderedIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
int start = -1, end = -1, i;
|
||||
int firstEnd;
|
||||
|
||||
private OrderedIntervalIterator(List<IntervalIterator> subIntervals) {
|
||||
super(subIntervals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
int b = Integer.MAX_VALUE;
|
||||
i = 1;
|
||||
while (true) {
|
||||
while (true) {
|
||||
if (subIterators.get(i - 1).end() >= b)
|
||||
return start;
|
||||
if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end())
|
||||
break;
|
||||
do {
|
||||
if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start;
|
||||
}
|
||||
while (subIterators.get(i).start() <= subIterators.get(i - 1).end());
|
||||
i++;
|
||||
}
|
||||
start = subIterators.get(0).start();
|
||||
if (start == NO_MORE_INTERVALS) {
|
||||
return end = NO_MORE_INTERVALS;
|
||||
}
|
||||
firstEnd = subIterators.get(0).end();
|
||||
end = subIterators.get(subIterators.size() - 1).end();
|
||||
b = subIterators.get(subIterators.size() - 1).start();
|
||||
i = 1;
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
int gaps = subIterators.get(1).start() - firstEnd - 1;
|
||||
for (int i = 2; i < subIterators.size(); i++) {
|
||||
gaps += (subIterators.get(i).start() - subIterators.get(i - 1).end() - 1);
|
||||
}
|
||||
return gaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() throws IOException {
|
||||
subIterators.get(0).nextInterval();
|
||||
i = 1;
|
||||
start = end = firstEnd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an iterator over intervals where the subiterators appear in any order
|
||||
*/
|
||||
static final IntervalFunction UNORDERED = new SingletonFunction("UNORDERED") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> intervalIterators) {
|
||||
return new UnorderedIntervalIterator(intervalIterators, true);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Return an iterator over intervals where the subiterators appear in any order, and do not overlap
|
||||
*/
|
||||
static final IntervalFunction UNORDERED_NO_OVERLAP = new SingletonFunction("UNORDERED_NO_OVERLAP") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> iterators) {
|
||||
return new UnorderedIntervalIterator(iterators, false);
|
||||
}
|
||||
};
|
||||
|
||||
private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
private final PriorityQueue<IntervalIterator> queue;
|
||||
private final IntervalIterator[] subIterators;
|
||||
private final int[] innerPositions;
|
||||
private final boolean allowOverlaps;
|
||||
|
||||
int start = -1, end = -1, firstEnd, queueEnd;
|
||||
|
||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) {
|
||||
super(subIterators);
|
||||
this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
|
||||
@Override
|
||||
protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
|
||||
return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end());
|
||||
}
|
||||
};
|
||||
this.subIterators = new IntervalIterator[subIterators.size()];
|
||||
this.innerPositions = new int[subIterators.size() * 2];
|
||||
this.allowOverlaps = allowOverlaps;
|
||||
|
||||
for (int i = 0; i < subIterators.size(); i++) {
|
||||
this.subIterators[i] = subIterators.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
void updateRightExtreme(IntervalIterator it) {
|
||||
int itEnd = it.end();
|
||||
if (itEnd > queueEnd) {
|
||||
queueEnd = itEnd;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
// first, find a matching interval
|
||||
while (this.queue.size() == subIterators.length && queue.top().start() == start) {
|
||||
IntervalIterator it = queue.pop();
|
||||
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
}
|
||||
if (this.queue.size() < subIterators.length)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
// then, minimize it
|
||||
do {
|
||||
start = queue.top().start();
|
||||
firstEnd = queue.top().end();
|
||||
end = queueEnd;
|
||||
if (queue.top().end() == end)
|
||||
return start;
|
||||
IntervalIterator it = queue.pop();
|
||||
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
} while (this.queue.size() == subIterators.length && end == queueEnd);
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
for (int i = 0; i < subIterators.length; i++) {
|
||||
if (subIterators[i].end() > end) {
|
||||
innerPositions[i * 2] = start;
|
||||
innerPositions[i * 2 + 1] = firstEnd;
|
||||
}
|
||||
else {
|
||||
innerPositions[i * 2] = subIterators[i].start();
|
||||
innerPositions[i * 2 + 1] = subIterators[i].end();
|
||||
}
|
||||
}
|
||||
Arrays.sort(innerPositions);
|
||||
int gaps = 0;
|
||||
for (int i = 1; i < subIterators.length; i++) {
|
||||
gaps += (innerPositions[i * 2] - innerPositions[i * 2 - 1] - 1);
|
||||
}
|
||||
return gaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() throws IOException {
|
||||
queueEnd = start = end = -1;
|
||||
this.queue.clear();
|
||||
loop: for (IntervalIterator it : subIterators) {
|
||||
if (it.nextInterval() == NO_MORE_INTERVALS) {
|
||||
break;
|
||||
}
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == NO_MORE_INTERVALS) {
|
||||
break loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasOverlaps(IntervalIterator candidate) {
|
||||
for (IntervalIterator it : queue) {
|
||||
if (it.start() < candidate.start()) {
|
||||
if (it.end() >= candidate.start()) {
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (it.start() == candidate.start()) {
|
||||
return true;
|
||||
}
|
||||
if (it.start() <= candidate.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an interval over iterators where the first iterator contains intervals from the second
|
||||
*/
|
||||
static final IntervalFunction CONTAINING = new SingletonFunction("CONTAINING") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> iterators) {
|
||||
if (iterators.size() != 2)
|
||||
throw new IllegalStateException("CONTAINING function requires two iterators");
|
||||
IntervalIterator a = iterators.get(0);
|
||||
IntervalIterator b = iterators.get(1);
|
||||
return new FilteringIntervalIterator(a, b) {
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
while (b.start() < a.start() && b.end() < a.end()) {
|
||||
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
if (a.start() <= b.start() && a.end() >= b.end())
|
||||
return a.start();
|
||||
}
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Return an iterator over intervals where the first iterator is contained by intervals from the second
|
||||
*/
|
||||
static final IntervalFunction CONTAINED_BY = new SingletonFunction("CONTAINED_BY") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> iterators) {
|
||||
if (iterators.size() != 2)
|
||||
throw new IllegalStateException("CONTAINED_BY function requires two iterators");
|
||||
IntervalIterator a = iterators.get(0);
|
||||
IntervalIterator b = iterators.get(1);
|
||||
return new FilteringIntervalIterator(a, b) {
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.end()) {
|
||||
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
if (b.start() <= a.start())
|
||||
return a.start();
|
||||
}
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
static final IntervalFunction OVERLAPPING = new SingletonFunction("OVERLAPPING") {
|
||||
@Override
|
||||
public IntervalIterator apply(List<IntervalIterator> iterators) {
|
||||
if (iterators.size() != 2)
|
||||
throw new IllegalStateException("OVERLAPPING function requires two iterators");
|
||||
IntervalIterator a = iterators.get(0);
|
||||
IntervalIterator b = iterators.get(1);
|
||||
return new FilteringIntervalIterator(a, b) {
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.start()) {
|
||||
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
if (b.start() <= a.end())
|
||||
return a.start();
|
||||
}
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
private static abstract class FilteringIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
final IntervalIterator a;
|
||||
final IntervalIterator b;
|
||||
|
||||
boolean bpos;
|
||||
|
||||
protected FilteringIntervalIterator(IntervalIterator a, IntervalIterator b) {
|
||||
super(Arrays.asList(a, b));
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
if (bpos == false) {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
return a.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
if (bpos == false) {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
return a.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return a.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() throws IOException {
|
||||
bpos = b.nextInterval() != NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
|
||||
private static abstract class SingletonFunction extends IntervalFunction {
|
||||
|
||||
private final String name;
|
||||
|
||||
protected SingletonFunction(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return obj == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -18,6 +18,7 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
@ -32,6 +33,15 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
* These sources implement minimum-interval algorithms taken from the paper
|
||||
* <a href="http://vigna.di.unimi.it/ftp/papers/EfficientAlgorithmsMinimalIntervalSemantics.pdf">
|
||||
* Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics</a>
|
||||
*
|
||||
* By default, sources that are sensitive to internal gaps (e.g. PHRASE and MAXGAPS) will
|
||||
* rewrite their sub-sources so that disjunctions of different lengths are pulled up
|
||||
* to the top of the interval tree. For example, PHRASE(or(PHRASE("a", "b", "c"), "b"), "c")
|
||||
* will automatically rewrite itself to OR(PHRASE("a", "b", "c", "c"), PHRASE("b", "c"))
|
||||
* to ensure that documents containing "b c" are matched. This can lead to less efficient
|
||||
* queries, as more terms need to be loaded (for example, the "c" iterator above is loaded
|
||||
* twice), so if you care more about speed than about accuracy you can use the
|
||||
* {@link #or(boolean, IntervalsSource...)} factory method to prevent rewriting.
|
||||
*/
|
||||
public final class Intervals {
|
||||
|
||||
@ -87,19 +97,54 @@ public final class Intervals {
|
||||
* Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of IntervalsSources
|
||||
*/
|
||||
public static IntervalsSource phrase(IntervalsSource... subSources) {
|
||||
if (subSources.length == 1) {
|
||||
return subSources[0];
|
||||
}
|
||||
return new ConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.BLOCK);
|
||||
return BlockIntervalsSource.build(Arrays.asList(subSources));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an {@link IntervalsSource} over the disjunction of a set of sub-sources
|
||||
*
|
||||
* Automatically rewrites if wrapped by an interval source that is sensitive to
|
||||
* internal gaps
|
||||
*/
|
||||
public static IntervalsSource or(IntervalsSource... subSources) {
|
||||
return or(true, Arrays.asList(subSources));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an {@link IntervalsSource} over the disjunction of a set of sub-sources
|
||||
*
|
||||
* @param rewrite if {@code false}, do not rewrite intervals that are sensitive to
|
||||
* internal gaps; this may run more efficiently, but can miss valid
|
||||
* hits due to minimization
|
||||
* @param subSources the sources to combine
|
||||
*/
|
||||
public static IntervalsSource or(boolean rewrite, IntervalsSource... subSources) {
|
||||
return or(rewrite, Arrays.asList(subSources));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an {@link IntervalsSource} over the disjunction of a set of sub-sources
|
||||
*/
|
||||
public static IntervalsSource or(IntervalsSource... subSources) {
|
||||
if (subSources.length == 1)
|
||||
return subSources[0];
|
||||
return new DisjunctionIntervalsSource(Arrays.asList(subSources));
|
||||
public static IntervalsSource or(List<IntervalsSource> subSources) {
|
||||
return or(true, subSources);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an {@link IntervalsSource} over the disjunction of a set of sub-sources
|
||||
*
|
||||
* @param rewrite if {@code false}, do not rewrite intervals that are sensitive to
|
||||
* internal gaps; this may run more efficiently, but can miss valid
|
||||
* hits due to minimization
|
||||
* @param subSources the sources to combine
|
||||
*/
|
||||
public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) {
|
||||
if (subSources.size() == 1) {
|
||||
return subSources.get(0);
|
||||
}
|
||||
if (rewrite) {
|
||||
return new DisjunctionIntervalsSource(subSources);
|
||||
}
|
||||
return new NoRewriteDisjunctionIntervalsSource(subSources);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -130,12 +175,7 @@ public final class Intervals {
|
||||
* @param subSource the sub-source to filter
|
||||
*/
|
||||
public static IntervalsSource maxwidth(int width, IntervalsSource subSource) {
|
||||
return new FilteredIntervalsSource("MAXWIDTH/" + width, subSource) {
|
||||
@Override
|
||||
protected boolean accept(IntervalIterator it) {
|
||||
return (it.end() - it.start()) + 1 <= width;
|
||||
}
|
||||
};
|
||||
return FilteredIntervalsSource.maxWidth(subSource, width);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -144,12 +184,7 @@ public final class Intervals {
|
||||
* @param subSource the sub-source to filter
|
||||
*/
|
||||
public static IntervalsSource maxgaps(int gaps, IntervalsSource subSource) {
|
||||
return new FilteredIntervalsSource("MAXGAPS/" + gaps, subSource) {
|
||||
@Override
|
||||
protected boolean accept(IntervalIterator it) {
|
||||
return it.gaps() <= gaps;
|
||||
}
|
||||
};
|
||||
return FilteredIntervalsSource.maxGaps(subSource, gaps);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -181,10 +216,7 @@ public final class Intervals {
|
||||
* @param subSources an ordered set of {@link IntervalsSource} objects
|
||||
*/
|
||||
public static IntervalsSource ordered(IntervalsSource... subSources) {
|
||||
if (subSources.length == 1) {
|
||||
return subSources[0];
|
||||
}
|
||||
return new MinimizingConjunctionIntervalsSource(Arrays.asList(subSources), IntervalFunction.ORDERED);
|
||||
return OrderedIntervalsSource.build(Arrays.asList(subSources));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -207,11 +239,7 @@ public final class Intervals {
|
||||
* @param allowOverlaps whether or not the sources should be allowed to overlap in a hit
|
||||
*/
|
||||
public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) {
|
||||
if (subSources.length == 1) {
|
||||
return subSources[0];
|
||||
}
|
||||
return new MinimizingConjunctionIntervalsSource(Arrays.asList(subSources),
|
||||
allowOverlaps ? IntervalFunction.UNORDERED : IntervalFunction.UNORDERED_NO_OVERLAP);
|
||||
return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -234,7 +262,7 @@ public final class Intervals {
|
||||
* @param subtrahend the {@link IntervalsSource} to filter by
|
||||
*/
|
||||
public static IntervalsSource nonOverlapping(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NON_OVERLAPPING);
|
||||
return new NonOverlappingIntervalsSource(minuend, subtrahend);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -243,7 +271,7 @@ public final class Intervals {
|
||||
* @param reference the source to filter by
|
||||
*/
|
||||
public static IntervalsSource overlapping(IntervalsSource source, IntervalsSource reference) {
|
||||
return new FilteringConjunctionIntervalsSource(source, reference, IntervalFunction.OVERLAPPING);
|
||||
return new OverlappingIntervalsSource(source, reference);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -258,8 +286,7 @@ public final class Intervals {
|
||||
* @param subtrahend the {@link IntervalsSource} to filter by
|
||||
*/
|
||||
public static IntervalsSource notWithin(IntervalsSource minuend, int positions, IntervalsSource subtrahend) {
|
||||
return new DifferenceIntervalsSource(minuend, Intervals.extend(subtrahend, positions, positions),
|
||||
DifferenceIntervalFunction.NON_OVERLAPPING);
|
||||
return new NonOverlappingIntervalsSource(minuend, Intervals.extend(subtrahend, positions, positions));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -283,7 +310,7 @@ public final class Intervals {
|
||||
* @param subtrahend the {@link IntervalsSource} to filter by
|
||||
*/
|
||||
public static IntervalsSource notContaining(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
return new DifferenceIntervalsSource(minuend, subtrahend, DifferenceIntervalFunction.NOT_CONTAINING);
|
||||
return NotContainingIntervalsSource.build(minuend, subtrahend);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -296,7 +323,7 @@ public final class Intervals {
|
||||
* @param small the {@link IntervalsSource} to filter by
|
||||
*/
|
||||
public static IntervalsSource containing(IntervalsSource big, IntervalsSource small) {
|
||||
return new FilteringConjunctionIntervalsSource(big, small, IntervalFunction.CONTAINING);
|
||||
return ContainingIntervalsSource.build(big, small);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -309,7 +336,7 @@ public final class Intervals {
|
||||
* @param big the {@link IntervalsSource} to filter by
|
||||
*/
|
||||
public static IntervalsSource notContainedBy(IntervalsSource small, IntervalsSource big) {
|
||||
return new DifferenceIntervalsSource(small, big, DifferenceIntervalFunction.NOT_CONTAINED_BY);
|
||||
return NotContainedByIntervalsSource.build(small, big);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -321,7 +348,7 @@ public final class Intervals {
|
||||
* @param big the {@link IntervalsSource} to filter by
|
||||
*/
|
||||
public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource big) {
|
||||
return new FilteringConjunctionIntervalsSource(small, big, IntervalFunction.CONTAINED_BY);
|
||||
return ContainedByIntervalsSource.build(small, big);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -335,18 +362,16 @@ public final class Intervals {
|
||||
* Returns intervals from the source that appear before intervals from the reference
|
||||
*/
|
||||
public static IntervalsSource before(IntervalsSource source, IntervalsSource reference) {
|
||||
return new FilteringConjunctionIntervalsSource(source,
|
||||
Intervals.extend(new OffsetIntervalsSource(reference, true), Integer.MAX_VALUE, 0),
|
||||
IntervalFunction.CONTAINED_BY);
|
||||
return ContainedByIntervalsSource.build(source,
|
||||
Intervals.extend(new OffsetIntervalsSource(reference, true), Integer.MAX_VALUE, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns intervals from the source that appear after intervals from the reference
|
||||
*/
|
||||
public static IntervalsSource after(IntervalsSource source, IntervalsSource reference) {
|
||||
return new FilteringConjunctionIntervalsSource(source,
|
||||
Intervals.extend(new OffsetIntervalsSource(reference, false), 0, Integer.MAX_VALUE),
|
||||
IntervalFunction.CONTAINED_BY);
|
||||
return ContainedByIntervalsSource.build(source,
|
||||
Intervals.extend(new OffsetIntervalsSource(reference, false), 0, Integer.MAX_VALUE));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
@ -64,6 +65,13 @@ public abstract class IntervalsSource {
|
||||
*/
|
||||
public abstract int minExtent();
|
||||
|
||||
/**
|
||||
* Expert: return the set of disjunctions that make up this IntervalsSource
|
||||
*
|
||||
* Most implementations can return {@code Collections.singleton(this)}
|
||||
*/
|
||||
public abstract Collection<IntervalsSource> pullUpDisjunctions();
|
||||
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
|
@ -1,133 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* A ConjunctionIntervalsSource that attempts to minimize its internal intervals by
|
||||
* eagerly advancing its first subinterval
|
||||
*
|
||||
* Uses caching to expose matches after its first subinterval has been moved on
|
||||
*/
|
||||
class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
MinimizingConjunctionIntervalsSource(List<IntervalsSource> subSources, IntervalFunction function) {
|
||||
super(subSources, function);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
List<CachingMatchesIterator> subs = new ArrayList<>();
|
||||
for (IntervalsSource source : subSources) {
|
||||
MatchesIterator mi = source.matches(field, ctx, doc);
|
||||
if (mi == null) {
|
||||
return null;
|
||||
}
|
||||
subs.add(new CachingMatchesIterator(mi));
|
||||
}
|
||||
IntervalIterator it = function.apply(subs.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList()));
|
||||
if (it.advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return null;
|
||||
}
|
||||
return new ConjunctionMatchesIterator(it, subs);
|
||||
}
|
||||
|
||||
private static class ConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
|
||||
final IntervalIterator iterator;
|
||||
final List<CachingMatchesIterator> subs;
|
||||
boolean cached = true;
|
||||
|
||||
private ConjunctionMatchesIterator(IntervalIterator iterator, List<CachingMatchesIterator> subs) {
|
||||
this.iterator = iterator;
|
||||
this.subs = subs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (cached) {
|
||||
cached = false;
|
||||
return true;
|
||||
}
|
||||
return iterator.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return iterator.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return iterator.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
int start = Integer.MAX_VALUE;
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
start = Math.min(start, s.startOffset(endPos));
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
int end = 0;
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
end = Math.max(end, s.endOffset(endPos));
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return iterator.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
List<MatchesIterator> mis = new ArrayList<>();
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
mis.add(s.getSubMatches(endPos));
|
||||
}
|
||||
return MatchesUtils.disjunction(mis);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
class MinimizingConjunctionMatchesIterator implements IntervalMatchesIterator {
|
||||
|
||||
final IntervalIterator iterator;
|
||||
private final List<CachingMatchesIterator> subs = new ArrayList<>();
|
||||
private boolean cached = true;
|
||||
|
||||
MinimizingConjunctionMatchesIterator(IntervalIterator iterator, List<MatchesIterator> subs) {
|
||||
this.iterator = iterator;
|
||||
for (MatchesIterator mi : subs) {
|
||||
assert mi instanceof CachingMatchesIterator;
|
||||
this.subs.add((CachingMatchesIterator)mi);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (cached) {
|
||||
cached = false;
|
||||
return true;
|
||||
}
|
||||
return iterator.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return iterator.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return iterator.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
int start = Integer.MAX_VALUE;
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
start = Math.min(start, s.startOffset(endPos));
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
int end = 0;
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
end = Math.max(end, s.endOffset(endPos));
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return iterator.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
List<MatchesIterator> mis = new ArrayList<>();
|
||||
int endPos = endPosition();
|
||||
for (CachingMatchesIterator s : subs) {
|
||||
mis.add(s.getSubMatches(endPos));
|
||||
}
|
||||
return MatchesUtils.disjunction(mis);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return null;
|
||||
}
|
||||
}
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -107,6 +108,11 @@ class MinimumShouldMatchIntervalsSource extends IntervalsSource {
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AtLeast("
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@ -97,6 +99,11 @@ class MultiTermIntervalsSource extends IntervalsSource {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
@ -17,23 +17,17 @@
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* An intervals source that combines two other sources, requiring both of them to
|
||||
* be present in order to match, but using the minExtent of one of them
|
||||
*/
|
||||
class FilteringConjunctionIntervalsSource extends ConjunctionIntervalsSource {
|
||||
class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource {
|
||||
|
||||
private final IntervalsSource source;
|
||||
|
||||
FilteringConjunctionIntervalsSource(IntervalsSource source, IntervalsSource filter, IntervalFunction function) {
|
||||
super(Arrays.asList(source, filter), function);
|
||||
this.source = source;
|
||||
public NoRewriteDisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
|
||||
super(subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
return source.minExtent();
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
|
||||
class NonOverlappingIntervalsSource extends DifferenceIntervalsSource {
|
||||
|
||||
NonOverlappingIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
return new NonOverlappingIterator(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof NonOverlappingIntervalsSource == false) return false;
|
||||
NonOverlappingIntervalsSource o = (NonOverlappingIntervalsSource) other;
|
||||
return Objects.equals(this.minuend, o.minuend) && Objects.equals(this.subtrahend, o.subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NON_OVERLAPPING(" + minuend + "," + subtrahend + ")";
|
||||
}
|
||||
|
||||
private static class NonOverlappingIterator extends RelativeIterator {
|
||||
|
||||
private NonOverlappingIterator(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return a.nextInterval();
|
||||
while (a.nextInterval() != NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.start()) {
|
||||
if (b.nextInterval() == NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return a.start();
|
||||
}
|
||||
}
|
||||
if (b.start() > a.end())
|
||||
return a.start();
|
||||
}
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
|
||||
class NotContainedByIntervalsSource extends DifferenceIntervalsSource {
|
||||
|
||||
static IntervalsSource build(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
return Intervals.or(Disjunctions.pullUp(subtrahend, s -> new NotContainedByIntervalsSource(minuend, s)));
|
||||
}
|
||||
|
||||
private NotContainedByIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
return new NotContainedByIterator(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof NotContainedByIntervalsSource == false) return false;
|
||||
NotContainedByIntervalsSource o = (NotContainedByIntervalsSource) other;
|
||||
return Objects.equals(this.minuend, o.minuend) && Objects.equals(this.subtrahend, o.subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NOT_CONTAINED_BY(" + minuend + "," + subtrahend + ")";
|
||||
}
|
||||
|
||||
private static class NotContainedByIterator extends RelativeIterator {
|
||||
|
||||
NotContainedByIterator(IntervalIterator a, IntervalIterator b) {
|
||||
super(a, b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return a.nextInterval();
|
||||
while (a.nextInterval() != NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.end()) {
|
||||
if (b.nextInterval() == NO_MORE_INTERVALS)
|
||||
return a.start();
|
||||
}
|
||||
if (a.start() < b.start())
|
||||
return a.start();
|
||||
}
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
|
||||
class NotContainingIntervalsSource extends DifferenceIntervalsSource {
|
||||
|
||||
static IntervalsSource build(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
return Intervals.or(Disjunctions.pullUp(minuend, s -> new NotContainingIntervalsSource(s, subtrahend)));
|
||||
}
|
||||
|
||||
private NotContainingIntervalsSource(IntervalsSource minuend, IntervalsSource subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
return new NotContainingIterator(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singletonList(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof NotContainingIntervalsSource == false) return false;
|
||||
NotContainingIntervalsSource o = (NotContainingIntervalsSource) other;
|
||||
return Objects.equals(this.minuend, o.minuend) && Objects.equals(this.subtrahend, o.subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NOT_CONTAINING(" + minuend + "," + subtrahend + ")";
|
||||
}
|
||||
|
||||
private static class NotContainingIterator extends RelativeIterator {
|
||||
|
||||
private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrahend) {
|
||||
super(minuend, subtrahend);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return a.nextInterval();
|
||||
while (a.nextInterval() != NO_MORE_INTERVALS) {
|
||||
while (b.start() < a.start() && b.end() < a.end()) {
|
||||
if (b.nextInterval() == NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return a.start();
|
||||
}
|
||||
}
|
||||
if (b.start() > a.end())
|
||||
return a.start();
|
||||
}
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -18,6 +18,8 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
@ -152,6 +154,11 @@ class OffsetIntervalsSource extends IntervalsSource {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
class OrderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(List<IntervalsSource> sources) {
|
||||
if (sources.size() == 1) {
|
||||
return sources.get(0);
|
||||
}
|
||||
return new OrderedIntervalsSource(flatten(sources));
|
||||
}
|
||||
|
||||
private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
|
||||
List<IntervalsSource> flattened = new ArrayList<>();
|
||||
for (IntervalsSource s : sources) {
|
||||
if (s instanceof OrderedIntervalsSource) {
|
||||
flattened.addAll(((OrderedIntervalsSource)s).subSources);
|
||||
}
|
||||
else {
|
||||
flattened.add(s);
|
||||
}
|
||||
}
|
||||
return flattened;
|
||||
}
|
||||
|
||||
private OrderedIntervalsSource(List<IntervalsSource> sources) {
|
||||
super(sources, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
return new OrderedIntervalIterator(iterators);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
int minExtent = 0;
|
||||
for (IntervalsSource subSource : subSources) {
|
||||
minExtent += subSource.minExtent();
|
||||
}
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(subSources, OrderedIntervalsSource::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof OrderedIntervalsSource == false) return false;
|
||||
OrderedIntervalsSource s = (OrderedIntervalsSource) other;
|
||||
return Objects.equals(subSources, s.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ORDERED(" + subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
|
||||
}
|
||||
|
||||
private static class OrderedIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
int start = -1, end = -1, i;
|
||||
int firstEnd;
|
||||
|
||||
private OrderedIntervalIterator(List<IntervalIterator> subIntervals) {
|
||||
super(subIntervals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
int b = Integer.MAX_VALUE;
|
||||
i = 1;
|
||||
while (true) {
|
||||
while (true) {
|
||||
if (subIterators.get(i - 1).end() >= b)
|
||||
return start;
|
||||
if (i == subIterators.size() || subIterators.get(i).start() > subIterators.get(i - 1).end())
|
||||
break;
|
||||
do {
|
||||
if (subIterators.get(i).end() >= b || subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start;
|
||||
}
|
||||
while (subIterators.get(i).start() <= subIterators.get(i - 1).end());
|
||||
i++;
|
||||
}
|
||||
start = subIterators.get(0).start();
|
||||
if (start == NO_MORE_INTERVALS) {
|
||||
return end = NO_MORE_INTERVALS;
|
||||
}
|
||||
firstEnd = subIterators.get(0).end();
|
||||
end = subIterators.get(subIterators.size() - 1).end();
|
||||
b = subIterators.get(subIterators.size() - 1).start();
|
||||
i = 1;
|
||||
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
int gaps = subIterators.get(1).start() - firstEnd - 1;
|
||||
for (int i = 2; i < subIterators.size(); i++) {
|
||||
gaps += (subIterators.get(i).start() - subIterators.get(i - 1).end() - 1);
|
||||
}
|
||||
return gaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() throws IOException {
|
||||
subIterators.get(0).nextInterval();
|
||||
i = 1;
|
||||
start = end = firstEnd = -1;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
class OverlappingIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
private final IntervalsSource source;
|
||||
private final IntervalsSource reference;
|
||||
|
||||
OverlappingIntervalsSource(IntervalsSource source, IntervalsSource reference) {
|
||||
super(Arrays.asList(source, reference), false);
|
||||
this.source = source;
|
||||
this.reference = reference;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
assert iterators.size() == 2;
|
||||
IntervalIterator a = iterators.get(0);
|
||||
IntervalIterator b = iterators.get(1);
|
||||
return new FilteringIntervalIterator(a, b) {
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (bpos == false)
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
while (b.end() < a.start()) {
|
||||
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
if (b.start() <= a.end())
|
||||
return a.start();
|
||||
}
|
||||
bpos = false;
|
||||
return IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
return source.minExtent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(Arrays.asList(source, reference), ss -> new OverlappingIntervalsSource(ss.get(0), ss.get(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(this.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof OverlappingIntervalsSource == false) return false;
|
||||
OverlappingIntervalsSource o = (OverlappingIntervalsSource) other;
|
||||
return Objects.equals(this.subSources, o.subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OVERLAPPING(" + source + "," + reference + ")";
|
||||
}
|
||||
}
|
@ -18,6 +18,8 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
@ -224,6 +226,11 @@ class PayloadFilteredTermIntervalsSource extends IntervalsSource {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(term);
|
||||
|
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
abstract class RelativeIterator extends IntervalIterator {
|
||||
|
||||
final IntervalIterator a;
|
||||
final IntervalIterator b;
|
||||
|
||||
boolean bpos;
|
||||
|
||||
RelativeIterator(IntervalIterator a, IntervalIterator b) {
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return a.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int doc = a.nextDoc();
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = a.advance(target);
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return a.cost();
|
||||
}
|
||||
|
||||
protected void reset() throws IOException {
|
||||
int doc = a.docID();
|
||||
bpos = b.docID() == doc ||
|
||||
(b.docID() < doc && b.advance(doc) == doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return a.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return a.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return a.gaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return a.matchCost() + b.matchCost();
|
||||
}
|
||||
}
|
@ -18,6 +18,8 @@
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||
@ -208,6 +210,11 @@ class TermIntervalsSource extends IntervalsSource {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(term);
|
||||
|
@ -0,0 +1,240 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
|
||||
|
||||
static IntervalsSource build(List<IntervalsSource> sources, boolean allowOverlaps) {
|
||||
if (sources.size() == 1) {
|
||||
return sources.get(0);
|
||||
}
|
||||
return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps);
|
||||
}
|
||||
|
||||
private static List<IntervalsSource> flatten(List<IntervalsSource> sources, boolean allowOverlaps) {
|
||||
List<IntervalsSource> flattened = new ArrayList<>();
|
||||
for (IntervalsSource s : sources) {
|
||||
if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) {
|
||||
flattened.addAll(((UnorderedIntervalsSource)s).subSources);
|
||||
}
|
||||
else {
|
||||
flattened.add(s);
|
||||
}
|
||||
}
|
||||
return flattened;
|
||||
}
|
||||
|
||||
private final boolean allowOverlaps;
|
||||
|
||||
private UnorderedIntervalsSource(List<IntervalsSource> sources, boolean allowOverlaps) {
|
||||
super(sources, true);
|
||||
this.allowOverlaps = allowOverlaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IntervalIterator combine(List<IntervalIterator> iterators) {
|
||||
return new UnorderedIntervalIterator(iterators, allowOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
int minExtent = 0;
|
||||
for (IntervalsSource subSource : subSources) {
|
||||
minExtent += subSource.minExtent();
|
||||
}
|
||||
return minExtent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(this.subSources, this.allowOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof UnorderedIntervalsSource == false) return false;
|
||||
UnorderedIntervalsSource o = (UnorderedIntervalsSource) other;
|
||||
return Objects.equals(this.subSources, o.subSources) &&
|
||||
Objects.equals(this.allowOverlaps, o.allowOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") +
|
||||
subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
|
||||
}
|
||||
|
||||
private static class UnorderedIntervalIterator extends ConjunctionIntervalIterator {
|
||||
|
||||
private final PriorityQueue<IntervalIterator> queue;
|
||||
private final IntervalIterator[] subIterators;
|
||||
private final int[] innerPositions;
|
||||
private final boolean allowOverlaps;
|
||||
|
||||
int start = -1, end = -1, firstEnd, queueEnd;
|
||||
|
||||
UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) {
|
||||
super(subIterators);
|
||||
this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
|
||||
@Override
|
||||
protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
|
||||
return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end());
|
||||
}
|
||||
};
|
||||
this.subIterators = new IntervalIterator[subIterators.size()];
|
||||
this.innerPositions = new int[subIterators.size() * 2];
|
||||
this.allowOverlaps = allowOverlaps;
|
||||
|
||||
for (int i = 0; i < subIterators.size(); i++) {
|
||||
this.subIterators[i] = subIterators.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
void updateRightExtreme(IntervalIterator it) {
|
||||
int itEnd = it.end();
|
||||
if (itEnd > queueEnd) {
|
||||
queueEnd = itEnd;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
// first, find a matching interval
|
||||
while (this.queue.size() == subIterators.length && queue.top().start() == start) {
|
||||
IntervalIterator it = queue.pop();
|
||||
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
}
|
||||
if (this.queue.size() < subIterators.length)
|
||||
return start = end = IntervalIterator.NO_MORE_INTERVALS;
|
||||
// then, minimize it
|
||||
do {
|
||||
start = queue.top().start();
|
||||
firstEnd = queue.top().end();
|
||||
end = queueEnd;
|
||||
if (queue.top().end() == end)
|
||||
return start;
|
||||
IntervalIterator it = queue.pop();
|
||||
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
} while (this.queue.size() == subIterators.length && end == queueEnd);
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
for (int i = 0; i < subIterators.length; i++) {
|
||||
if (subIterators[i].end() > end) {
|
||||
innerPositions[i * 2] = start;
|
||||
innerPositions[i * 2 + 1] = firstEnd;
|
||||
}
|
||||
else {
|
||||
innerPositions[i * 2] = subIterators[i].start();
|
||||
innerPositions[i * 2 + 1] = subIterators[i].end();
|
||||
}
|
||||
}
|
||||
Arrays.sort(innerPositions);
|
||||
int gaps = 0;
|
||||
for (int i = 1; i < subIterators.length; i++) {
|
||||
gaps += (innerPositions[i * 2] - innerPositions[i * 2 - 1] - 1);
|
||||
}
|
||||
return gaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reset() throws IOException {
|
||||
queueEnd = start = end = -1;
|
||||
this.queue.clear();
|
||||
loop: for (IntervalIterator it : subIterators) {
|
||||
if (it.nextInterval() == NO_MORE_INTERVALS) {
|
||||
break;
|
||||
}
|
||||
if (allowOverlaps == false) {
|
||||
while (hasOverlaps(it)) {
|
||||
if (it.nextInterval() == NO_MORE_INTERVALS) {
|
||||
break loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
queue.add(it);
|
||||
updateRightExtreme(it);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasOverlaps(IntervalIterator candidate) {
|
||||
for (IntervalIterator it : queue) {
|
||||
if (it.start() < candidate.start()) {
|
||||
if (it.end() >= candidate.start()) {
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (it.start() == candidate.start()) {
|
||||
return true;
|
||||
}
|
||||
if (it.start() <= candidate.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.intervals;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import static org.apache.lucene.search.intervals.Intervals.*;
|
||||
|
||||
public class TestDisjunctionRewrites extends LuceneTestCase {
|
||||
|
||||
public void testDisjunctionSuffix() {
|
||||
|
||||
// BLOCK(a,or(b, BLOCK(b, c))) => or(BLOCK(a, b), BLOCK(a, b, c))
|
||||
IntervalsSource actual = Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.or(Intervals.term("b"), Intervals.phrase("b", "c")));
|
||||
IntervalsSource expected = Intervals.or(
|
||||
Intervals.phrase("a", "b"),
|
||||
Intervals.phrase("a", "b", "c"));
|
||||
assertEquals(expected, actual);
|
||||
|
||||
}
|
||||
|
||||
public void testPhraseDisjunctionWithDifferentLengthClauses() {
|
||||
|
||||
// BLOCK(a, or(b, BLOCK(b, c)), d) => or(BLOCK(a, b, d), BLOCK(a, b, c, d))
|
||||
|
||||
IntervalsSource actual = Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.or(Intervals.term("b"), Intervals.phrase(Intervals.term("b"), Intervals.term("c"))),
|
||||
Intervals.term("d"));
|
||||
|
||||
IntervalsSource expected = Intervals.or(
|
||||
Intervals.phrase("a", "b", "d"),
|
||||
Intervals.phrase("a", "b", "c", "d")
|
||||
);
|
||||
|
||||
assertEquals(expected, actual);
|
||||
|
||||
}
|
||||
|
||||
public void testPhraseDisjunctionWithNestedDifferentLengthClauses() {
|
||||
|
||||
// BLOCK(a, or(ORDERED(or(b, c), d), b, p, q), f, g)
|
||||
// => or(BLOCK(a, or(b, p, q), f, g), BLOCK(a, ORDERED(or(b, c), d), f, g))
|
||||
|
||||
IntervalsSource expected = Intervals.or(
|
||||
Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.or(Intervals.term("b"), Intervals.term("p"), Intervals.term("q")),
|
||||
Intervals.term("f"),
|
||||
Intervals.term("g")),
|
||||
Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.ordered(Intervals.or(Intervals.term("b"), Intervals.term("c")), Intervals.term("d")),
|
||||
Intervals.term("f"),
|
||||
Intervals.term("g")
|
||||
)
|
||||
);
|
||||
|
||||
IntervalsSource actual = Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.or(
|
||||
Intervals.ordered(Intervals.or(Intervals.term("b"), Intervals.term("c")), Intervals.term("d")),
|
||||
Intervals.term("b"),
|
||||
Intervals.term("p"),
|
||||
Intervals.term("q")
|
||||
),
|
||||
Intervals.term("f"),
|
||||
Intervals.term("g")
|
||||
);
|
||||
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testDisjunctionRewritePreservesFilters() {
|
||||
|
||||
// BLOCK(a, MAXGAPS/3(OR(BLOCK(a, b), BLOCK(c, d))), c)
|
||||
// => or(BLOCK(a, MAXGAPS/3(BLOCK(a, b)), c), BLOCK(a, MAXGAPS/3(BLOCK(c, d)), c))
|
||||
|
||||
IntervalsSource actual = Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.maxgaps(3, Intervals.or(
|
||||
Intervals.phrase("a", "b"),
|
||||
Intervals.phrase("c", "d")
|
||||
)),
|
||||
Intervals.term("c")
|
||||
);
|
||||
|
||||
IntervalsSource expected = Intervals.or(
|
||||
Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.maxgaps(3, Intervals.phrase("a", "b")),
|
||||
Intervals.term("c")
|
||||
),
|
||||
Intervals.phrase(
|
||||
Intervals.term("a"),
|
||||
Intervals.maxgaps(3, Intervals.phrase("c", "d")),
|
||||
Intervals.term("c")
|
||||
));
|
||||
|
||||
assertEquals(expected, actual);
|
||||
|
||||
}
|
||||
|
||||
public void testNestedMaxGaps() {
|
||||
// MAXGAPS/3(ORDERED(MAXGAPS/4(ORDERED(a, or(b, BLOCK(c, d)))), e)
|
||||
// => or(MAXGAPS/3(ORDERED(MAXGAPS/4(ORDERED(a, b)), e)), MAXGAPS/3(ORDERED(MAXGAPS/4(ORDERED(a, BLOCK(c, d))), e)))
|
||||
IntervalsSource actual = maxgaps(3, ordered(
|
||||
maxgaps(4, ordered(term("a"), or(term("b"), phrase("c", "d")))), term("e")));
|
||||
IntervalsSource expected = or(
|
||||
maxgaps(3, ordered(maxgaps(4, ordered(term("a"), term("b"))), term("e"))),
|
||||
maxgaps(3, ordered(maxgaps(4, ordered(term("a"), phrase("c", "d"))), term("e")))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testNestedMaxWidth() {
|
||||
// maxwidth does not automatically pull up disjunctions at construction time, so we need to check
|
||||
// that it does the right thing if wrapped by something that does need exact internal accounting
|
||||
// PHRASE(a, MAXWIDTH(4, OR(ORDERED(b, c), ORDERED(d, e))), f)
|
||||
// => or(PHRASE(a, MAXWIDTH(4, ORDERED(b, c)), f), PHRASE(a, MAXWIDTH(4, ORDERED(d, e)), f))
|
||||
IntervalsSource actual = phrase(term("a"), maxwidth(4, or(ordered(term("b"), term("c")), ordered(term("d"), term("e")))), term("f"));
|
||||
IntervalsSource expected = or(
|
||||
phrase(term("a"), maxwidth(4, ordered(term("b"), term("c"))), term("f")),
|
||||
phrase(term("a"), maxwidth(4, ordered(term("d"), term("e"))), term("f"))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testNestedFixField() {
|
||||
// PHRASE(a, FIXFIELD(field, or(PHRASE(a, b), b)), c)
|
||||
// => or(PHRASE(a, FIXFIELD(PHRASE(a, b)), c), PHRASE(a, FIXFIELD(b), c))
|
||||
IntervalsSource actual = phrase(term("a"), fixField("field", or(phrase("a", "b"), term("b"))), term("c"));
|
||||
IntervalsSource expected = or(
|
||||
phrase(term("a"), fixField("field", phrase("a", "b")), term("c")),
|
||||
phrase(term("a"), fixField("field", term("b")), term("c"))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testContainedBy() {
|
||||
// the 'big' interval should not be minimized, the 'small' one should be
|
||||
// CONTAINED_BY(or("s", BLOCK("s", "t")), MAXGAPS/4(or(ORDERED("a", "b"), ORDERED("c", "d"))))
|
||||
// => or(CONTAINED_BY(or("s", BLOCK("s", "t")), MAXGAPS/4(ORDERED("a", "b"))),
|
||||
// CONTAINED_BY(or("s", BLOCK("s", "t")), MAXGAPS/4(ORDERED("c", "d"))))
|
||||
IntervalsSource actual = containedBy(or(term("s"), phrase("s", "t")),
|
||||
maxgaps(4, or(ordered(term("a"), term("b")), ordered(term("c"), term("d")))));
|
||||
IntervalsSource expected = or(
|
||||
containedBy(or(term("s"), phrase("s", "t")), maxgaps(4, ordered(term("a"), term("b")))),
|
||||
containedBy(or(term("s"), phrase("s", "t")), maxgaps(4, ordered(term("c"), term("d"))))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testContaining() {
|
||||
// the 'big' interval should not be minimized, the 'small' one should be
|
||||
// CONTAINING(MAXGAPS/4(or(ORDERED("a", "b"), ORDERED("c", "d"))), or("s", BLOCK("s", "t")))
|
||||
// => or(CONTAINING(MAXGAPS/4(ORDERED("a", "b")), or("s", BLOCK("s", "t"))),
|
||||
// CONTAINING(MAXGAPS/4(ORDERED("c", "d")), or("s", BLOCK("s", "t"))))
|
||||
IntervalsSource actual = containing(maxgaps(4, or(ordered(term("a"), term("b")), ordered(term("c"), term("d")))),
|
||||
or(term("s"), phrase("s", "t")));
|
||||
IntervalsSource expected = or(
|
||||
containing(maxgaps(4, ordered(term("a"), term("b"))), or(term("s"), phrase("s", "t"))),
|
||||
containing(maxgaps(4, ordered(term("c"), term("d"))), or(term("s"), phrase("s", "t")))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testNotContainedBy() {
|
||||
// the 'big' interval should not be minimized, the 'small' one should be
|
||||
// NOT_CONTAINED_BY(or(BLOCK("a", "b"), "a"), or(BLOCK("c", "d"), "d"))
|
||||
// => or(NOT_CONTAINED_BY(or(BLOCK("a", "b"), "a"), BLOCK("c", "d"))), NOT_CONTAINED_BY(or(BLOCK("a", "b"), "a"), "d"))
|
||||
IntervalsSource actual = notContainedBy(or(phrase("a", "b"), term("a")), or(phrase("c", "d"), term("d")));
|
||||
IntervalsSource expected = or(
|
||||
notContainedBy(or(phrase("a", "b"), term("a")), phrase("c", "d")),
|
||||
notContainedBy(or(phrase("a", "b"), term("a")), term("d"))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testNotContaining() {
|
||||
// the 'big' interval should not be minimized, the 'small' one should be
|
||||
// NOT_CONTAINING(or(BLOCK("a", "b"), "a"), or(BLOCK("c", "d"), "d"))
|
||||
// => or(NOT_CONTAINING(BLOCK("a", "b"), or(BLOCK("c", "d"), "d")), NOT_CONTAINING("a", or(BLOCK("c", "d"), "d")))
|
||||
IntervalsSource actual = notContaining(or(phrase("a", "b"), term("a")), or(phrase("c", "d"), term("d")));
|
||||
IntervalsSource expected = or(
|
||||
notContaining(phrase("a", "b"), or(phrase("c", "d"), term("d"))),
|
||||
notContaining(term("a"), or(phrase("c", "d"), term("d")))
|
||||
);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testBlockedRewrites() {
|
||||
IntervalsSource actual = phrase(term("a"), or(false, phrase("b", "c"), term("c")));
|
||||
IntervalsSource ifRewritten = or(phrase("a", "b", "c"), phrase("a", "c"));
|
||||
assertNotEquals(ifRewritten, actual);
|
||||
}
|
||||
|
||||
}
|
@ -31,7 +31,6 @@ import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Ignore;
|
||||
|
||||
public class TestIntervalQuery extends LuceneTestCase {
|
||||
|
||||
@ -71,7 +70,8 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||
"w2 w1",
|
||||
"w2 w1 w3 w2 w4",
|
||||
"coordinate genome mapping research",
|
||||
"coordinate genome research"
|
||||
"coordinate genome research",
|
||||
"greater new york"
|
||||
};
|
||||
|
||||
private void checkHits(Query query, int[] results) throws IOException {
|
||||
@ -166,10 +166,6 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||
checkHits(q, new int[]{});
|
||||
}
|
||||
|
||||
// The Vigna paper doesn't deal with prefix disjunctions. For now, we keep the same
|
||||
// logic as detailed in the paper, but we may want to address it in future so that tests
|
||||
// like the one below will pass
|
||||
@Ignore
|
||||
public void testNestedOr() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.phrase(
|
||||
Intervals.term("coordinate"),
|
||||
@ -178,6 +174,39 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||
checkHits(q, new int[]{ 6, 7 });
|
||||
}
|
||||
|
||||
public void testNestedOrWithGaps() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.phrase(
|
||||
Intervals.term("coordinate"),
|
||||
Intervals.or(Intervals.term("genome"), Intervals.extend(Intervals.term("mapping"), 1, 0)),
|
||||
Intervals.term("research")));
|
||||
checkHits(q, new int[]{ 6, 7 });
|
||||
}
|
||||
|
||||
public void testNestedOrWithinDifference() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.phrase(
|
||||
Intervals.term("coordinate"),
|
||||
Intervals.notContaining(
|
||||
Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")),
|
||||
Intervals.term("wibble")),
|
||||
Intervals.term("research")));
|
||||
checkHits(q, new int[]{ 6, 7 });
|
||||
}
|
||||
|
||||
public void testNestedOrWithinConjunctionFilter() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.phrase(
|
||||
Intervals.term("coordinate"),
|
||||
Intervals.containing(
|
||||
Intervals.or(Intervals.phrase("genome", "mapping"), Intervals.term("genome")),
|
||||
Intervals.term("genome")),
|
||||
Intervals.term("research")));
|
||||
checkHits(q, new int[]{ 6, 7 });
|
||||
|
||||
q = new IntervalQuery(field, Intervals.phrase(
|
||||
Intervals.term("greater"),
|
||||
Intervals.or(Intervals.phrase("new", "york"), Intervals.term("york"))));
|
||||
checkHits(q, new int[]{ 8 });
|
||||
}
|
||||
|
||||
public void testUnordered() throws IOException {
|
||||
Query q = new IntervalQuery(field,
|
||||
Intervals.unordered(
|
||||
@ -191,6 +220,21 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||
checkHits(q, new int[]{3});
|
||||
}
|
||||
|
||||
public void testNestedOrInUnorderedMaxGaps() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered(
|
||||
Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")),
|
||||
Intervals.term("research"))
|
||||
));
|
||||
checkHits(q, new int[]{ 6, 7 });
|
||||
}
|
||||
|
||||
public void testNestedOrInContainedBy() throws IOException {
|
||||
Query q = new IntervalQuery(field, Intervals.containedBy(
|
||||
Intervals.term("genome"),
|
||||
Intervals.or(Intervals.term("coordinate"), Intervals.ordered(Intervals.term("coordinate"), Intervals.term("research")))));
|
||||
checkHits(q, new int[]{ 6, 7 });
|
||||
}
|
||||
|
||||
public void testDefinedGaps() throws IOException {
|
||||
Query q = new IntervalQuery(field,
|
||||
Intervals.phrase(Intervals.term("w1"), Intervals.extend(Intervals.term("w2"), 1, 0)));
|
||||
|
@ -118,12 +118,12 @@ public class TestIntervals extends LuceneTestCase {
|
||||
if (i >= expected[id].length) {
|
||||
fail("Unexpected match in doc " + id + ": " + intervals);
|
||||
}
|
||||
assertEquals("Wrong start value in doc " + id, expected[id][i], pos);
|
||||
assertEquals(source + ": wrong start value in doc " + id, expected[id][i], pos);
|
||||
assertEquals("start() != pos returned from nextInterval()", expected[id][i], intervals.start());
|
||||
assertEquals("Wrong end value in doc " + id, expected[id][i + 1], intervals.end());
|
||||
i += 2;
|
||||
}
|
||||
assertEquals("Wrong number of endpoints in doc " + id, expected[id].length, i);
|
||||
assertEquals(source + ": wrong number of endpoints in doc " + id, expected[id].length, i);
|
||||
assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.start());
|
||||
assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.end());
|
||||
if (i > 0)
|
||||
@ -762,4 +762,22 @@ public class TestIntervals extends LuceneTestCase {
|
||||
assertMatch(mi, 17, 17, 97, 100);
|
||||
}
|
||||
|
||||
public void testWrappedFilters() throws IOException {
|
||||
|
||||
IntervalsSource source = Intervals.or(
|
||||
Intervals.term("nine"),
|
||||
Intervals.maxgaps(1, Intervals.or(
|
||||
Intervals.ordered(Intervals.term("pease"), Intervals.term("hot")),
|
||||
Intervals.ordered(Intervals.term("pease"), Intervals.term("cold")))));
|
||||
checkIntervals(source, "field1", 3, new int[][]{
|
||||
{},
|
||||
{ 0, 2, 3, 5, 11, 11, 28, 28 },
|
||||
{ 0, 2, 3, 5 },
|
||||
{},
|
||||
{ 0, 2, 3, 5, 11, 11 },
|
||||
{}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||
public class TestSimplifications extends LuceneTestCase {
|
||||
|
||||
public void testStringPhrases() {
|
||||
// BLOCK(term) => term
|
||||
IntervalsSource actual = Intervals.phrase("term");
|
||||
assertEquals(Intervals.term("term"), actual);
|
||||
}
|
||||
@ -32,18 +33,45 @@ public class TestSimplifications extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testOrdered() {
|
||||
// ORDERED(term) => term
|
||||
IntervalsSource actual = Intervals.ordered(Intervals.term("term"));
|
||||
assertEquals(Intervals.term("term"), actual);
|
||||
}
|
||||
|
||||
public void testUnordered() {
|
||||
// UNORDERED(term) => term
|
||||
IntervalsSource actual = Intervals.unordered(Intervals.term("term"));
|
||||
assertEquals(Intervals.term("term"), actual);
|
||||
}
|
||||
|
||||
public void testUnorderedOverlaps() {
|
||||
IntervalsSource actual = Intervals.unordered(true, Intervals.term("term"));
|
||||
// UNORDERED_NO_OVERLAPS(term) => term
|
||||
IntervalsSource actual = Intervals.unordered(false, Intervals.term("term"));
|
||||
assertEquals(Intervals.term("term"), actual);
|
||||
}
|
||||
|
||||
public void testDisjunctionRemovesDuplicates() {
|
||||
// or(a, b, a) => or(a, b)
|
||||
IntervalsSource actual = Intervals.or(Intervals.term("a"), Intervals.term("b"), Intervals.term("a"));
|
||||
assertEquals(Intervals.or(Intervals.term("a"), Intervals.term("b")), actual);
|
||||
}
|
||||
|
||||
public void testPhraseSimplification() {
|
||||
// BLOCK(BLOCK(a, b), c) => BLOCK(a, b, c)
|
||||
IntervalsSource actual = Intervals.phrase(Intervals.phrase(Intervals.term("a"), Intervals.term("b")), Intervals.term("c"));
|
||||
assertEquals(Intervals.phrase(Intervals.term("a"), Intervals.term("b"), Intervals.term("c")), actual);
|
||||
|
||||
// BLOCK(a, BLOCK(b, BLOCK(c, d))) => BLOCK(a, b, c, d)
|
||||
actual = Intervals.phrase(Intervals.term("a"), Intervals.phrase(Intervals.term("b"),
|
||||
Intervals.phrase(Intervals.term("c"), Intervals.term("d"))));
|
||||
assertEquals(Intervals.phrase(Intervals.term("a"), Intervals.term("b"), Intervals.term("c"), Intervals.term("d")), actual);
|
||||
}
|
||||
|
||||
public void testDisjunctionSimplification() {
|
||||
// or(a, or(b, or(c, d))) => or(a, b, c, d)
|
||||
IntervalsSource actual = Intervals.or(Intervals.term("a"), Intervals.or(Intervals.term("b"),
|
||||
Intervals.or(Intervals.term("c"), Intervals.term("d"))));
|
||||
assertEquals(Intervals.or(Intervals.term("a"), Intervals.term("b"), Intervals.term("c"), Intervals.term("d")), actual);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user