mirror of https://github.com/apache/lucene.git
LUCENE-6373: complete two phase doc id iteration support for Spans
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1675776 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
558d85f892
commit
5105d036bd
|
@ -50,6 +50,10 @@ New Features
|
||||||
FilterSpans to just have an accept(Spans candidate) method for
|
FilterSpans to just have an accept(Spans candidate) method for
|
||||||
subclasses. (Robert Muir)
|
subclasses. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6373: SpanOrQuery shares disjunction logic with boolean
|
||||||
|
queries, and supports two-phased iterators to avoid loading
|
||||||
|
positions when possible. (Paul Elschot via Robert Muir)
|
||||||
|
|
||||||
* LUCENE-6352: Added a new query time join to the join module that uses
|
* LUCENE-6352: Added a new query time join to the join module that uses
|
||||||
global ordinals, which is faster for subsequent joins between reopens.
|
global ordinals, which is faster for subsequent joins between reopens.
|
||||||
(Martijn van Groningen, Adrien Grand)
|
(Martijn van Groningen, Adrien Grand)
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
import org.apache.lucene.util.CollectionUtil;
|
||||||
import org.apache.lucene.search.spans.Spans;
|
|
||||||
|
|
||||||
/** A conjunction of DocIdSetIterators.
|
/** A conjunction of DocIdSetIterators.
|
||||||
* This iterates over the doc ids that are present in each given DocIdSetIterator.
|
* This iterates over the doc ids that are present in each given DocIdSetIterator.
|
||||||
|
@ -35,20 +34,16 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
||||||
/** Create a conjunction over the provided iterators, taking advantage of
|
/** Create a conjunction over the provided iterators, taking advantage of
|
||||||
* {@link TwoPhaseIterator}. */
|
* {@link TwoPhaseIterator}. */
|
||||||
public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
|
public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
|
||||||
|
assert iterators.size() >= 2;
|
||||||
final List<DocIdSetIterator> allIterators = new ArrayList<>();
|
final List<DocIdSetIterator> allIterators = new ArrayList<>();
|
||||||
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
|
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
|
||||||
for (DocIdSetIterator iterator : iterators) {
|
for (DocIdSetIterator iter : iterators) {
|
||||||
TwoPhaseIterator twoPhaseIterator = null;
|
TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(iter);
|
||||||
if (iterator instanceof Scorer) {
|
if (twoPhaseIter != null) {
|
||||||
twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
|
allIterators.add(twoPhaseIter.approximation());
|
||||||
} else if (iterator instanceof Spans) {
|
twoPhaseIterators.add(twoPhaseIter);
|
||||||
twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
|
|
||||||
}
|
|
||||||
if (twoPhaseIterator != null) {
|
|
||||||
allIterators.add(twoPhaseIterator.approximation());
|
|
||||||
twoPhaseIterators.add(twoPhaseIterator);
|
|
||||||
} else { // no approximation support, use the iterator as-is
|
} else { // no approximation support, use the iterator as-is
|
||||||
allIterators.add(iterator);
|
allIterators.add(iter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +58,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
||||||
final DocIdSetIterator[] others;
|
final DocIdSetIterator[] others;
|
||||||
|
|
||||||
ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
|
ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
|
||||||
|
assert iterators.size() >= 2;
|
||||||
// Sort the array the first time to allow the least frequent DocsEnum to
|
// Sort the array the first time to allow the least frequent DocsEnum to
|
||||||
// lead the matching.
|
// lead the matching.
|
||||||
CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
|
CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
|
||||||
|
|
|
@ -23,37 +23,13 @@ import java.util.Iterator;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A priority queue of scorers that orders by current doc ID.
|
* A priority queue of DocIdSetIterators that orders by current doc ID.
|
||||||
* This specialization is needed over {@link PriorityQueue} because the
|
* This specialization is needed over {@link PriorityQueue} because the
|
||||||
* pluggable comparison function makes the rebalancing quite slow.
|
* pluggable comparison function makes the rebalancing quite slow.
|
||||||
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper> {
|
public final class DisiPriorityQueue<Iter extends DocIdSetIterator>
|
||||||
|
implements Iterable<DisiWrapper<Iter>> {
|
||||||
static class ScorerWrapper {
|
|
||||||
final Scorer scorer;
|
|
||||||
final long cost;
|
|
||||||
int doc; // the current doc, used for comparison
|
|
||||||
ScorerWrapper next; // reference to a next element, see #topList
|
|
||||||
|
|
||||||
// An approximation of the scorer, or the scorer itself if it does not
|
|
||||||
// support two-phase iteration
|
|
||||||
final DocIdSetIterator approximation;
|
|
||||||
// A two-phase view of the scorer, or null if the scorer does not support
|
|
||||||
// two-phase iteration
|
|
||||||
final TwoPhaseIterator twoPhaseView;
|
|
||||||
|
|
||||||
ScorerWrapper(Scorer scorer) {
|
|
||||||
this.scorer = scorer;
|
|
||||||
this.cost = scorer.cost();
|
|
||||||
this.doc = -1;
|
|
||||||
this.twoPhaseView = scorer.asTwoPhaseIterator();
|
|
||||||
if (twoPhaseView != null) {
|
|
||||||
approximation = twoPhaseView.approximation();
|
|
||||||
} else {
|
|
||||||
approximation = scorer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int leftNode(int node) {
|
static int leftNode(int node) {
|
||||||
return ((node + 1) << 1) - 1;
|
return ((node + 1) << 1) - 1;
|
||||||
|
@ -67,27 +43,27 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
||||||
return ((node + 1) >>> 1) - 1;
|
return ((node + 1) >>> 1) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final ScorerWrapper[] heap;
|
private final DisiWrapper<Iter>[] heap;
|
||||||
private int size;
|
private int size;
|
||||||
|
|
||||||
ScorerPriorityQueue(int maxSize) {
|
public DisiPriorityQueue(int maxSize) {
|
||||||
heap = new ScorerWrapper[maxSize];
|
heap = new DisiWrapper[maxSize];
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int size() {
|
public int size() {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
ScorerWrapper top() {
|
public DisiWrapper<Iter> top() {
|
||||||
return heap[0];
|
return heap[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the list of scorers which are on the current doc. */
|
/** Get the list of scorers which are on the current doc. */
|
||||||
ScorerWrapper topList() {
|
public DisiWrapper<Iter> topList() {
|
||||||
final ScorerWrapper[] heap = this.heap;
|
final DisiWrapper<Iter>[] heap = this.heap;
|
||||||
final int size = this.size;
|
final int size = this.size;
|
||||||
ScorerWrapper list = heap[0];
|
DisiWrapper<Iter> list = heap[0];
|
||||||
list.next = null;
|
list.next = null;
|
||||||
if (size >= 3) {
|
if (size >= 3) {
|
||||||
list = topList(list, heap, size, 1);
|
list = topList(list, heap, size, 1);
|
||||||
|
@ -98,14 +74,15 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepend w1 (scorer) to w2 (list)
|
// prepend w1 (iterator) to w2 (list)
|
||||||
private static ScorerWrapper prepend(ScorerWrapper w1, ScorerWrapper w2) {
|
private DisiWrapper<Iter> prepend(DisiWrapper<Iter> w1, DisiWrapper<Iter> w2) {
|
||||||
w1.next = w2;
|
w1.next = w2;
|
||||||
return w1;
|
return w1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ScorerWrapper topList(ScorerWrapper list, ScorerWrapper[] heap, int size, int i) {
|
private DisiWrapper<Iter> topList(DisiWrapper<Iter> list, DisiWrapper<Iter>[] heap,
|
||||||
final ScorerWrapper w = heap[i];
|
int size, int i) {
|
||||||
|
final DisiWrapper<Iter> w = heap[i];
|
||||||
if (w.doc == list.doc) {
|
if (w.doc == list.doc) {
|
||||||
list = prepend(w, list);
|
list = prepend(w, list);
|
||||||
final int left = leftNode(i);
|
final int left = leftNode(i);
|
||||||
|
@ -120,37 +97,37 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
ScorerWrapper add(ScorerWrapper entry) {
|
public DisiWrapper<Iter> add(DisiWrapper<Iter> entry) {
|
||||||
final ScorerWrapper[] heap = this.heap;
|
final DisiWrapper<Iter>[] heap = this.heap;
|
||||||
final int size = this.size;
|
final int size = this.size;
|
||||||
heap[size] = entry;
|
heap[size] = entry;
|
||||||
upHeap(heap, size);
|
upHeap(size);
|
||||||
this.size = size + 1;
|
this.size = size + 1;
|
||||||
return heap[0];
|
return heap[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
ScorerWrapper pop() {
|
public DisiWrapper<Iter> pop() {
|
||||||
final ScorerWrapper[] heap = this.heap;
|
final DisiWrapper<Iter>[] heap = this.heap;
|
||||||
final ScorerWrapper result = heap[0];
|
final DisiWrapper<Iter> result = heap[0];
|
||||||
final int i = --size;
|
final int i = --size;
|
||||||
heap[0] = heap[i];
|
heap[0] = heap[i];
|
||||||
heap[i] = null;
|
heap[i] = null;
|
||||||
downHeap(heap, i);
|
downHeap(i);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
ScorerWrapper updateTop() {
|
public DisiWrapper<Iter> updateTop() {
|
||||||
downHeap(heap, size);
|
downHeap(size);
|
||||||
return heap[0];
|
return heap[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
ScorerWrapper updateTop(ScorerWrapper topReplacement) {
|
DisiWrapper<Iter> updateTop(DisiWrapper<Iter> topReplacement) {
|
||||||
heap[0] = topReplacement;
|
heap[0] = topReplacement;
|
||||||
return updateTop();
|
return updateTop();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void upHeap(ScorerWrapper[] heap, int i) {
|
void upHeap(int i) {
|
||||||
final ScorerWrapper node = heap[i];
|
final DisiWrapper<Iter> node = heap[i];
|
||||||
final int nodeDoc = node.doc;
|
final int nodeDoc = node.doc;
|
||||||
int j = parentNode(i);
|
int j = parentNode(i);
|
||||||
while (j >= 0 && nodeDoc < heap[j].doc) {
|
while (j >= 0 && nodeDoc < heap[j].doc) {
|
||||||
|
@ -161,9 +138,9 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
||||||
heap[i] = node;
|
heap[i] = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void downHeap(ScorerWrapper[] heap, int size) {
|
void downHeap(int size) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
final ScorerWrapper node = heap[0];
|
final DisiWrapper<Iter> node = heap[0];
|
||||||
int j = leftNode(i);
|
int j = leftNode(i);
|
||||||
if (j < size) {
|
if (j < size) {
|
||||||
int k = rightNode(j);
|
int k = rightNode(j);
|
||||||
|
@ -186,8 +163,10 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<ScorerWrapper> iterator() {
|
public Iterator<DisiWrapper<Iter>> iterator() {
|
||||||
return Arrays.asList(heap).subList(0, size).iterator();
|
return Arrays.asList(heap).subList(0, size).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper used in {@link DisiPriorityQueue}.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public class DisiWrapper<Iter extends DocIdSetIterator> {
|
||||||
|
public final Iter iterator;
|
||||||
|
public final long cost;
|
||||||
|
public int doc; // the current doc, used for comparison
|
||||||
|
public DisiWrapper<Iter> next; // reference to a next element, see #topList
|
||||||
|
|
||||||
|
// An approximation of the iterator, or the iterator itself if it does not
|
||||||
|
// support two-phase iteration
|
||||||
|
public final DocIdSetIterator approximation;
|
||||||
|
// A two-phase view of the iterator, or null if the iterator does not support
|
||||||
|
// two-phase iteration
|
||||||
|
public final TwoPhaseIterator twoPhaseView;
|
||||||
|
|
||||||
|
public int lastApproxMatchDoc; // last doc of approximation that did match
|
||||||
|
public int lastApproxNonMatchDoc; // last doc of approximation that did not match
|
||||||
|
|
||||||
|
public DisiWrapper(Iter iterator) {
|
||||||
|
this.iterator = iterator;
|
||||||
|
this.cost = iterator.cost();
|
||||||
|
this.doc = -1;
|
||||||
|
this.twoPhaseView = TwoPhaseIterator.asTwoPhaseIterator(iterator);
|
||||||
|
|
||||||
|
if (twoPhaseView != null) {
|
||||||
|
approximation = twoPhaseView.approximation();
|
||||||
|
} else {
|
||||||
|
approximation = iterator;
|
||||||
|
}
|
||||||
|
this.lastApproxNonMatchDoc = -2;
|
||||||
|
this.lastApproxMatchDoc = -2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link DocIdSetIterator} which is a disjunction of the approximations of
|
||||||
|
* the provided iterators.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public class DisjunctionDISIApproximation<Iter extends DocIdSetIterator>
|
||||||
|
extends DocIdSetIterator {
|
||||||
|
|
||||||
|
final DisiPriorityQueue<Iter> subIterators;
|
||||||
|
final long cost;
|
||||||
|
|
||||||
|
public DisjunctionDISIApproximation(DisiPriorityQueue<Iter> subIterators) {
|
||||||
|
this.subIterators = subIterators;
|
||||||
|
long cost = 0;
|
||||||
|
for (DisiWrapper<Iter> w : subIterators) {
|
||||||
|
cost += w.cost;
|
||||||
|
}
|
||||||
|
this.cost = cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return subIterators.top().doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
DisiWrapper<Iter> top = subIterators.top();
|
||||||
|
final int doc = top.doc;
|
||||||
|
do {
|
||||||
|
top.doc = top.approximation.nextDoc();
|
||||||
|
top = subIterators.updateTop();
|
||||||
|
} while (top.doc == doc);
|
||||||
|
|
||||||
|
return top.doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
DisiWrapper<Iter> top = subIterators.top();
|
||||||
|
do {
|
||||||
|
top.doc = top.approximation.advance(target);
|
||||||
|
top = subIterators.updateTop();
|
||||||
|
} while (top.doc < target);
|
||||||
|
|
||||||
|
return top.doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers
|
* The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers
|
||||||
* is generated in document number order. The score for each document is the maximum of the scores computed
|
* is generated in document number order. The score for each document is the maximum of the scores computed
|
||||||
|
@ -48,11 +46,11 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected float score(ScorerWrapper topList) throws IOException {
|
protected float score(DisiWrapper<Scorer> topList) throws IOException {
|
||||||
float scoreSum = 0;
|
float scoreSum = 0;
|
||||||
float scoreMax = 0;
|
float scoreMax = 0;
|
||||||
for (ScorerWrapper w = topList; w != null; w = w.next) {
|
for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
|
||||||
final float subScore = w.scorer.score();
|
final float subScore = w.iterator.score();
|
||||||
scoreSum += subScore;
|
scoreSum += subScore;
|
||||||
if (subScore > scoreMax) {
|
if (subScore > scoreMax) {
|
||||||
scoreMax = subScore;
|
scoreMax = subScore;
|
||||||
|
|
|
@ -22,29 +22,27 @@ import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for Scorers that score disjunctions.
|
* Base class for Scorers that score disjunctions.
|
||||||
*/
|
*/
|
||||||
abstract class DisjunctionScorer extends Scorer {
|
abstract class DisjunctionScorer extends Scorer {
|
||||||
|
|
||||||
private final boolean needsScores;
|
private final boolean needsScores;
|
||||||
private final ScorerPriorityQueue subScorers;
|
private final DisiPriorityQueue<Scorer> subScorers;
|
||||||
private final long cost;
|
private final long cost;
|
||||||
|
|
||||||
/** Linked list of scorers which are on the current doc */
|
/** Linked list of scorers which are on the current doc */
|
||||||
private ScorerWrapper topScorers;
|
private DisiWrapper<Scorer> topScorers;
|
||||||
|
|
||||||
protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
|
protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
|
||||||
super(weight);
|
super(weight);
|
||||||
if (subScorers.size() <= 1) {
|
if (subScorers.size() <= 1) {
|
||||||
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
||||||
}
|
}
|
||||||
this.subScorers = new ScorerPriorityQueue(subScorers.size());
|
this.subScorers = new DisiPriorityQueue<Scorer>(subScorers.size());
|
||||||
long cost = 0;
|
long cost = 0;
|
||||||
for (Scorer scorer : subScorers) {
|
for (Scorer scorer : subScorers) {
|
||||||
final ScorerWrapper w = new ScorerWrapper(scorer);
|
final DisiWrapper<Scorer> w = new DisiWrapper<>(scorer);
|
||||||
cost += w.cost;
|
cost += w.cost;
|
||||||
this.subScorers.add(w);
|
this.subScorers.add(w);
|
||||||
}
|
}
|
||||||
|
@ -52,69 +50,17 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
this.needsScores = needsScores;
|
this.needsScores = needsScores;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link DocIdSetIterator} which is a disjunction of the approximations of
|
|
||||||
* the provided iterators.
|
|
||||||
*/
|
|
||||||
private static class DisjunctionDISIApproximation extends DocIdSetIterator {
|
|
||||||
|
|
||||||
final ScorerPriorityQueue subScorers;
|
|
||||||
final long cost;
|
|
||||||
|
|
||||||
DisjunctionDISIApproximation(ScorerPriorityQueue subScorers) {
|
|
||||||
this.subScorers = subScorers;
|
|
||||||
long cost = 0;
|
|
||||||
for (ScorerWrapper w : subScorers) {
|
|
||||||
cost += w.cost;
|
|
||||||
}
|
|
||||||
this.cost = cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return subScorers.top().doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
ScorerWrapper top = subScorers.top();
|
|
||||||
final int doc = top.doc;
|
|
||||||
do {
|
|
||||||
top.doc = top.approximation.nextDoc();
|
|
||||||
top = subScorers.updateTop();
|
|
||||||
} while (top.doc == doc);
|
|
||||||
|
|
||||||
return top.doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
ScorerWrapper top = subScorers.top();
|
|
||||||
do {
|
|
||||||
top.doc = top.approximation.advance(target);
|
|
||||||
top = subScorers.updateTop();
|
|
||||||
} while (top.doc < target);
|
|
||||||
|
|
||||||
return top.doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
boolean hasApproximation = false;
|
boolean hasApproximation = false;
|
||||||
for (ScorerWrapper w : subScorers) {
|
for (DisiWrapper<Scorer> w : subScorers) {
|
||||||
if (w.twoPhaseView != null) {
|
if (w.twoPhaseView != null) {
|
||||||
hasApproximation = true;
|
hasApproximation = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasApproximation == false) {
|
if (! hasApproximation) {
|
||||||
// none of the sub scorers supports approximations
|
// none of the sub scorers supports approximations
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -122,13 +68,13 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
// note it is important to share the same pq as this scorer so that
|
// note it is important to share the same pq as this scorer so that
|
||||||
// rebalancing the pq through the approximation will also rebalance
|
// rebalancing the pq through the approximation will also rebalance
|
||||||
// the pq in this scorer.
|
// the pq in this scorer.
|
||||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
|
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Scorer>(subScorers)) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
ScorerWrapper topScorers = subScorers.topList();
|
DisiWrapper<Scorer> topScorers = subScorers.topList();
|
||||||
// remove the head of the list as long as it does not match
|
// remove the head of the list as long as it does not match
|
||||||
while (topScorers.twoPhaseView != null && topScorers.twoPhaseView.matches() == false) {
|
while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) {
|
||||||
topScorers = topScorers.next;
|
topScorers = topScorers.next;
|
||||||
if (topScorers == null) {
|
if (topScorers == null) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -138,9 +84,9 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
if (needsScores) {
|
if (needsScores) {
|
||||||
// if scores or freqs are needed, we also need to remove scorers
|
// if scores or freqs are needed, we also need to remove scorers
|
||||||
// from the top list that do not actually match
|
// from the top list that do not actually match
|
||||||
ScorerWrapper previous = topScorers;
|
DisiWrapper<Scorer> previous = topScorers;
|
||||||
for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
|
for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
|
||||||
if (w.twoPhaseView != null && w.twoPhaseView.matches() == false) {
|
if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
|
||||||
// w does not match, remove it
|
// w does not match, remove it
|
||||||
previous.next = w.next;
|
previous.next = w.next;
|
||||||
} else {
|
} else {
|
||||||
|
@ -175,10 +121,10 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
@Override
|
@Override
|
||||||
public final int nextDoc() throws IOException {
|
public final int nextDoc() throws IOException {
|
||||||
topScorers = null;
|
topScorers = null;
|
||||||
ScorerWrapper top = subScorers.top();
|
DisiWrapper<Scorer> top = subScorers.top();
|
||||||
final int doc = top.doc;
|
final int doc = top.doc;
|
||||||
do {
|
do {
|
||||||
top.doc = top.scorer.nextDoc();
|
top.doc = top.iterator.nextDoc();
|
||||||
top = subScorers.updateTop();
|
top = subScorers.updateTop();
|
||||||
} while (top.doc == doc);
|
} while (top.doc == doc);
|
||||||
|
|
||||||
|
@ -188,9 +134,9 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
@Override
|
@Override
|
||||||
public final int advance(int target) throws IOException {
|
public final int advance(int target) throws IOException {
|
||||||
topScorers = null;
|
topScorers = null;
|
||||||
ScorerWrapper top = subScorers.top();
|
DisiWrapper<Scorer> top = subScorers.top();
|
||||||
do {
|
do {
|
||||||
top.doc = top.scorer.advance(target);
|
top.doc = top.iterator.advance(target);
|
||||||
top = subScorers.updateTop();
|
top = subScorers.updateTop();
|
||||||
} while (top.doc < target);
|
} while (top.doc < target);
|
||||||
|
|
||||||
|
@ -203,7 +149,7 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
topScorers = subScorers.topList();
|
topScorers = subScorers.topList();
|
||||||
}
|
}
|
||||||
int freq = 1;
|
int freq = 1;
|
||||||
for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
|
for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
|
||||||
freq += 1;
|
freq += 1;
|
||||||
}
|
}
|
||||||
return freq;
|
return freq;
|
||||||
|
@ -218,13 +164,13 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Compute the score for the given linked list of scorers. */
|
/** Compute the score for the given linked list of scorers. */
|
||||||
protected abstract float score(ScorerWrapper topList) throws IOException;
|
protected abstract float score(DisiWrapper<Scorer> topList) throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Collection<ChildScorer> getChildren() {
|
public final Collection<ChildScorer> getChildren() {
|
||||||
ArrayList<ChildScorer> children = new ArrayList<>();
|
ArrayList<ChildScorer> children = new ArrayList<>();
|
||||||
for (ScorerWrapper scorer : subScorers) {
|
for (DisiWrapper<Scorer> scorer : subScorers) {
|
||||||
children.add(new ChildScorer(scorer.scorer, "SHOULD"));
|
children.add(new ChildScorer(scorer.iterator, "SHOULD"));
|
||||||
}
|
}
|
||||||
return children;
|
return children;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
|
||||||
|
|
||||||
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
||||||
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
||||||
*/
|
*/
|
||||||
|
@ -39,11 +37,11 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected float score(ScorerWrapper topList) throws IOException {
|
protected float score(DisiWrapper<Scorer> topList) throws IOException {
|
||||||
double score = 0;
|
double score = 0;
|
||||||
int freq = 0;
|
int freq = 0;
|
||||||
for (ScorerWrapper w = topList; w != null; w = w.next) {
|
for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
|
||||||
score += w.scorer.score();
|
score += w.iterator.score();
|
||||||
freq += 1;
|
freq += 1;
|
||||||
}
|
}
|
||||||
return (float)score * coord[freq];
|
return (float)score * coord[freq];
|
||||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.spans.Spans;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This abstract class defines methods to iterate over a set of non-decreasing
|
* This abstract class defines methods to iterate over a set of non-decreasing
|
||||||
* doc ids. Note that this class assumes it iterates on doc Ids, and therefore
|
* doc ids. Note that this class assumes it iterates on doc Ids, and therefore
|
||||||
|
@ -175,4 +177,5 @@ public abstract class DocIdSetIterator {
|
||||||
* completely inaccurate.
|
* completely inaccurate.
|
||||||
*/
|
*/
|
||||||
public abstract long cost();
|
public abstract long cost();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,12 +23,11 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
|
import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
|
||||||
import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
|
import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
|
||||||
import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
|
import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link Scorer} for {@link BooleanQuery} when
|
* A {@link Scorer} for {@link BooleanQuery} when
|
||||||
|
@ -83,17 +82,17 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
|
|
||||||
// list of scorers which 'lead' the iteration and are currently
|
// list of scorers which 'lead' the iteration and are currently
|
||||||
// positioned on 'doc'
|
// positioned on 'doc'
|
||||||
ScorerWrapper lead;
|
DisiWrapper<Scorer> lead;
|
||||||
int doc; // current doc ID of the leads
|
int doc; // current doc ID of the leads
|
||||||
int freq; // number of scorers on the desired doc ID
|
int freq; // number of scorers on the desired doc ID
|
||||||
|
|
||||||
// priority queue of scorers that are too advanced compared to the current
|
// priority queue of scorers that are too advanced compared to the current
|
||||||
// doc. Ordered by doc ID.
|
// doc. Ordered by doc ID.
|
||||||
final ScorerPriorityQueue head;
|
final DisiPriorityQueue<Scorer> head;
|
||||||
|
|
||||||
// priority queue of scorers which are behind the current doc.
|
// priority queue of scorers which are behind the current doc.
|
||||||
// Ordered by cost.
|
// Ordered by cost.
|
||||||
final ScorerWrapper[] tail;
|
final DisiWrapper<Scorer>[] tail;
|
||||||
int tailSize;
|
int tailSize;
|
||||||
|
|
||||||
final Collection<ChildScorer> childScorers;
|
final Collection<ChildScorer> childScorers;
|
||||||
|
@ -113,13 +112,13 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
this.coord = coord;
|
this.coord = coord;
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
|
|
||||||
head = new ScorerPriorityQueue(scorers.size() - minShouldMatch + 1);
|
head = new DisiPriorityQueue<Scorer>(scorers.size() - minShouldMatch + 1);
|
||||||
// there can be at most minShouldMatch - 1 scorers beyond the current position
|
// there can be at most minShouldMatch - 1 scorers beyond the current position
|
||||||
// otherwise we might be skipping over matching documents
|
// otherwise we might be skipping over matching documents
|
||||||
tail = new ScorerWrapper[minShouldMatch - 1];
|
tail = new DisiWrapper[minShouldMatch - 1];
|
||||||
|
|
||||||
for (Scorer scorer : scorers) {
|
for (Scorer scorer : scorers) {
|
||||||
addLead(new ScorerWrapper(scorer));
|
addLead(new DisiWrapper<Scorer>(scorer));
|
||||||
}
|
}
|
||||||
|
|
||||||
List<ChildScorer> children = new ArrayList<>();
|
List<ChildScorer> children = new ArrayList<>();
|
||||||
|
@ -145,13 +144,13 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
// We are moving to the next doc ID, so scorers in 'lead' need to go in
|
// We are moving to the next doc ID, so scorers in 'lead' need to go in
|
||||||
// 'tail'. If there is not enough space in 'tail', then we take the least
|
// 'tail'. If there is not enough space in 'tail', then we take the least
|
||||||
// costly scorers and advance them.
|
// costly scorers and advance them.
|
||||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||||
final ScorerWrapper evicted = insertTailWithOverFlow(s);
|
final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
|
||||||
if (evicted != null) {
|
if (evicted != null) {
|
||||||
if (evicted.doc == doc) {
|
if (evicted.doc == doc) {
|
||||||
evicted.doc = evicted.scorer.nextDoc();
|
evicted.doc = evicted.iterator.nextDoc();
|
||||||
} else {
|
} else {
|
||||||
evicted.doc = evicted.scorer.advance(doc + 1);
|
evicted.doc = evicted.iterator.advance(doc + 1);
|
||||||
}
|
}
|
||||||
head.add(evicted);
|
head.add(evicted);
|
||||||
}
|
}
|
||||||
|
@ -164,23 +163,23 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) throws IOException {
|
||||||
// Same logic as in nextDoc
|
// Same logic as in nextDoc
|
||||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||||
final ScorerWrapper evicted = insertTailWithOverFlow(s);
|
final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
|
||||||
if (evicted != null) {
|
if (evicted != null) {
|
||||||
evicted.doc = evicted.scorer.advance(target);
|
evicted.doc = evicted.iterator.advance(target);
|
||||||
head.add(evicted);
|
head.add(evicted);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// But this time there might also be scorers in 'head' behind the desired
|
// But this time there might also be scorers in 'head' behind the desired
|
||||||
// target so we need to do the same thing that we did on 'lead' on 'head'
|
// target so we need to do the same thing that we did on 'lead' on 'head'
|
||||||
ScorerWrapper headTop = head.top();
|
DisiWrapper<Scorer> headTop = head.top();
|
||||||
while (headTop.doc < target) {
|
while (headTop.doc < target) {
|
||||||
final ScorerWrapper evicted = insertTailWithOverFlow(headTop);
|
final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(headTop);
|
||||||
// We know that the tail is full since it contains at most
|
// We know that the tail is full since it contains at most
|
||||||
// minShouldMatch - 1 entries and we just moved at least minShouldMatch
|
// minShouldMatch - 1 entries and we just moved at least minShouldMatch
|
||||||
// entries to it, so evicted is not null
|
// entries to it, so evicted is not null
|
||||||
evicted.doc = evicted.scorer.advance(target);
|
evicted.doc = evicted.iterator.advance(target);
|
||||||
headTop = head.updateTop(evicted);
|
headTop = head.updateTop(evicted);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,20 +187,20 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
return doNext();
|
return doNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addLead(ScorerWrapper lead) {
|
private void addLead(DisiWrapper<Scorer> lead) {
|
||||||
lead.next = this.lead;
|
lead.next = this.lead;
|
||||||
this.lead = lead;
|
this.lead = lead;
|
||||||
freq += 1;
|
freq += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pushBackLeads() throws IOException {
|
private void pushBackLeads() throws IOException {
|
||||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||||
addTail(s);
|
addTail(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void advanceTail(ScorerWrapper top) throws IOException {
|
private void advanceTail(DisiWrapper<Scorer> top) throws IOException {
|
||||||
top.doc = top.scorer.advance(doc);
|
top.doc = top.iterator.advance(doc);
|
||||||
if (top.doc == doc) {
|
if (top.doc == doc) {
|
||||||
addLead(top);
|
addLead(top);
|
||||||
} else {
|
} else {
|
||||||
|
@ -210,7 +209,7 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void advanceTail() throws IOException {
|
private void advanceTail() throws IOException {
|
||||||
final ScorerWrapper top = popTail();
|
final DisiWrapper<Scorer> top = popTail();
|
||||||
advanceTail(top);
|
advanceTail(top);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -276,8 +275,8 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
// we need to know about all matches
|
// we need to know about all matches
|
||||||
updateFreq();
|
updateFreq();
|
||||||
double score = 0;
|
double score = 0;
|
||||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||||
score += s.scorer.score();
|
score += s.iterator.score();
|
||||||
}
|
}
|
||||||
return coord[freq] * (float) score;
|
return coord[freq] * (float) score;
|
||||||
}
|
}
|
||||||
|
@ -289,12 +288,12 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
||||||
private ScorerWrapper insertTailWithOverFlow(ScorerWrapper s) {
|
private DisiWrapper<Scorer> insertTailWithOverFlow(DisiWrapper<Scorer> s) {
|
||||||
if (tailSize < tail.length) {
|
if (tailSize < tail.length) {
|
||||||
addTail(s);
|
addTail(s);
|
||||||
return null;
|
return null;
|
||||||
} else if (tail.length >= 1) {
|
} else if (tail.length >= 1) {
|
||||||
final ScorerWrapper top = tail[0];
|
final DisiWrapper<Scorer> top = tail[0];
|
||||||
if (top.cost < s.cost) {
|
if (top.cost < s.cost) {
|
||||||
tail[0] = s;
|
tail[0] = s;
|
||||||
downHeapCost(tail, tailSize);
|
downHeapCost(tail, tailSize);
|
||||||
|
@ -305,16 +304,16 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Add an entry to 'tail'. Fails if over capacity. */
|
/** Add an entry to 'tail'. Fails if over capacity. */
|
||||||
private void addTail(ScorerWrapper s) {
|
private void addTail(DisiWrapper<Scorer> s) {
|
||||||
tail[tailSize] = s;
|
tail[tailSize] = s;
|
||||||
upHeapCost(tail, tailSize);
|
upHeapCost(tail, tailSize);
|
||||||
tailSize += 1;
|
tailSize += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Pop the least-costly scorer from 'tail'. */
|
/** Pop the least-costly scorer from 'tail'. */
|
||||||
private ScorerWrapper popTail() {
|
private DisiWrapper<Scorer> popTail() {
|
||||||
assert tailSize > 0;
|
assert tailSize > 0;
|
||||||
final ScorerWrapper result = tail[0];
|
final DisiWrapper<Scorer> result = tail[0];
|
||||||
tail[0] = tail[--tailSize];
|
tail[0] = tail[--tailSize];
|
||||||
downHeapCost(tail, tailSize);
|
downHeapCost(tail, tailSize);
|
||||||
return result;
|
return result;
|
||||||
|
@ -322,8 +321,8 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
|
|
||||||
/** Heap helpers */
|
/** Heap helpers */
|
||||||
|
|
||||||
private static void upHeapCost(ScorerWrapper[] heap, int i) {
|
private static void upHeapCost(DisiWrapper<Scorer>[] heap, int i) {
|
||||||
final ScorerWrapper node = heap[i];
|
final DisiWrapper<Scorer> node = heap[i];
|
||||||
final long nodeCost = node.cost;
|
final long nodeCost = node.cost;
|
||||||
int j = parentNode(i);
|
int j = parentNode(i);
|
||||||
while (j >= 0 && nodeCost < heap[j].cost) {
|
while (j >= 0 && nodeCost < heap[j].cost) {
|
||||||
|
@ -334,9 +333,9 @@ final class MinShouldMatchSumScorer extends Scorer {
|
||||||
heap[i] = node;
|
heap[i] = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void downHeapCost(ScorerWrapper[] heap, int size) {
|
private static void downHeapCost(DisiWrapper<Scorer>[] heap, int size) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
final ScorerWrapper node = heap[0];
|
final DisiWrapper<Scorer> node = heap[0];
|
||||||
int j = leftNode(i);
|
int j = leftNode(i);
|
||||||
if (j < size) {
|
if (j < size) {
|
||||||
int k = rightNode(j);
|
int k = rightNode(j);
|
||||||
|
|
|
@ -20,9 +20,13 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.spans.Spans;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returned by {@link Scorer#asTwoPhaseIterator()} to expose an approximation of
|
* Returned by {@link Scorer#asTwoPhaseIterator()}
|
||||||
* a {@link DocIdSetIterator}. When the {@link #approximation()}'s
|
* and {@link Spans#asTwoPhaseIterator()}
|
||||||
|
* to expose an approximation of a {@link DocIdSetIterator}.
|
||||||
|
* When the {@link #approximation()}'s
|
||||||
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
|
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
|
||||||
* return, {@link #matches()} needs to be checked in order to know whether the
|
* return, {@link #matches()} needs to be checked in order to know whether the
|
||||||
* returned doc ID actually matches.
|
* returned doc ID actually matches.
|
||||||
|
@ -89,4 +93,16 @@ public abstract class TwoPhaseIterator {
|
||||||
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
|
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
|
||||||
public abstract boolean matches() throws IOException;
|
public abstract boolean matches() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
|
||||||
|
* when available * otherwise returns null.
|
||||||
|
*/
|
||||||
|
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
|
||||||
|
return (iter instanceof Scorer)
|
||||||
|
? ((Scorer) iter).asTwoPhaseIterator()
|
||||||
|
: (iter instanceof Spans)
|
||||||
|
? ((Spans) iter).asTwoPhaseIterator()
|
||||||
|
: null;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,9 +31,13 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermContext;
|
import org.apache.lucene.index.TermContext;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.DisiPriorityQueue;
|
||||||
|
import org.apache.lucene.search.DisiWrapper;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
|
import org.apache.lucene.search.DisjunctionDISIApproximation;
|
||||||
|
|
||||||
|
|
||||||
/** Matches the union of its clauses.
|
/** Matches the union of its clauses.
|
||||||
*/
|
*/
|
||||||
|
@ -146,35 +150,16 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private class SpanQueue extends PriorityQueue<Spans> {
|
|
||||||
public SpanQueue(int size) {
|
|
||||||
super(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected final boolean lessThan(Spans spans1, Spans spans2) {
|
|
||||||
if (spans1.docID() == spans2.docID()) {
|
|
||||||
if (spans1.startPosition() == spans2.startPosition()) {
|
|
||||||
return spans1.endPosition() < spans2.endPosition();
|
|
||||||
} else {
|
|
||||||
return spans1.startPosition() < spans2.startPosition();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return spans1.docID() < spans2.docID();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
|
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||||
|
|
||||||
for (SpanQuery seq : clauses) {
|
for (SpanQuery sq : clauses) {
|
||||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
Spans spans = sq.getSpans(context, acceptDocs, termContexts);
|
||||||
if (subSpan != null) {
|
if (spans != null) {
|
||||||
subSpans.add(subSpan);
|
subSpans.add(spans);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,114 +169,168 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||||
return subSpans.get(0);
|
return subSpans.get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
SpanQueue queue = new SpanQueue(clauses.size());
|
DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
|
||||||
for (Spans spans : subSpans) {
|
for (Spans spans : subSpans) {
|
||||||
queue.add(spans);
|
byDocQueue.add(new DisiWrapper<>(spans));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
|
||||||
|
|
||||||
return new Spans() {
|
return new Spans() {
|
||||||
|
Spans topPositionSpans = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
if (queue.size() == 0) { // all done
|
topPositionSpans = null;
|
||||||
return NO_MORE_DOCS;
|
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||||
}
|
int currentDoc = topDocSpans.doc;
|
||||||
|
|
||||||
int currentDoc = top().docID();
|
|
||||||
|
|
||||||
if (currentDoc == -1) { // initially
|
|
||||||
return advance(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
|
topDocSpans.doc = topDocSpans.iterator.nextDoc();
|
||||||
queue.updateTop();
|
topDocSpans = byDocQueue.updateTop();
|
||||||
} else {
|
} while (topDocSpans.doc == currentDoc);
|
||||||
queue.pop(); // exhausted a clause
|
return topDocSpans.doc;
|
||||||
if (queue.size() == 0) {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// assert queue.size() > 0;
|
|
||||||
int doc = top().docID();
|
|
||||||
if (doc > currentDoc) {
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
} while (true);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Spans top() {
|
|
||||||
return queue.top();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) throws IOException {
|
||||||
|
topPositionSpans = null;
|
||||||
while ((queue.size() > 0) && (top().docID() < target)) {
|
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||||
if (top().advance(target) != NO_MORE_DOCS) {
|
do {
|
||||||
queue.updateTop();
|
topDocSpans.doc = topDocSpans.iterator.advance(target);
|
||||||
} else {
|
topDocSpans = byDocQueue.updateTop();
|
||||||
queue.pop();
|
} while (topDocSpans.doc < target);
|
||||||
}
|
return topDocSpans.doc;
|
||||||
}
|
|
||||||
|
|
||||||
return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
public int docID() {
|
||||||
return (queue == null) ? -1
|
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||||
: (queue.size() > 0) ? top().docID()
|
return topDocSpans.doc;
|
||||||
: NO_MORE_DOCS;
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
|
boolean hasApproximation = false;
|
||||||
|
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||||
|
if (w.twoPhaseView != null) {
|
||||||
|
hasApproximation = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! hasApproximation) { // none of the sub spans supports approximations
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
return twoPhaseCurrentDocMatches();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
int lastDocTwoPhaseMatched = -1;
|
||||||
|
|
||||||
|
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||||
|
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||||
|
// remove the head of the list as long as it does not match
|
||||||
|
final int currentDoc = listAtCurrentDoc.doc;
|
||||||
|
while (listAtCurrentDoc.twoPhaseView != null) {
|
||||||
|
if (listAtCurrentDoc.twoPhaseView.matches()) {
|
||||||
|
// use this spans for positions at current doc:
|
||||||
|
listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// do not use this spans for positions at current doc:
|
||||||
|
listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
|
||||||
|
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||||
|
if (listAtCurrentDoc == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastDocTwoPhaseMatched = currentDoc;
|
||||||
|
topPositionSpans = null;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fillPositionQueue() throws IOException { // called at first nextStartPosition
|
||||||
|
assert byPositionQueue.size() == 0;
|
||||||
|
// add all matching Spans at current doc to byPositionQueue
|
||||||
|
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||||
|
while (listAtCurrentDoc != null) {
|
||||||
|
Spans spansAtDoc = listAtCurrentDoc.iterator;
|
||||||
|
if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
|
||||||
|
if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
|
||||||
|
if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
|
||||||
|
spansAtDoc = null;
|
||||||
|
} else {
|
||||||
|
if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
|
||||||
|
if (! listAtCurrentDoc.twoPhaseView.matches()) {
|
||||||
|
spansAtDoc = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (spansAtDoc != null) {
|
||||||
|
assert spansAtDoc.docID() == listAtCurrentDoc.doc;
|
||||||
|
assert spansAtDoc.startPosition() == -1;
|
||||||
|
spansAtDoc.nextStartPosition();
|
||||||
|
assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
|
||||||
|
byPositionQueue.add(spansAtDoc);
|
||||||
|
}
|
||||||
|
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||||
|
}
|
||||||
|
assert byPositionQueue.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextStartPosition() throws IOException {
|
public int nextStartPosition() throws IOException {
|
||||||
top().nextStartPosition();
|
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||||
queue.updateTop();
|
assert topDocSpans.doc != NO_MORE_DOCS;
|
||||||
int startPos = top().startPosition();
|
if (topPositionSpans == null) {
|
||||||
while (startPos == -1) { // initially at this doc
|
byPositionQueue.clear();
|
||||||
top().nextStartPosition();
|
fillPositionQueue(); // fills byPositionQueue at first position
|
||||||
queue.updateTop();
|
topPositionSpans = byPositionQueue.top();
|
||||||
startPos = top().startPosition();
|
} else {
|
||||||
|
topPositionSpans.nextStartPosition();
|
||||||
|
topPositionSpans = byPositionQueue.updateTop();
|
||||||
}
|
}
|
||||||
return startPos;
|
return topPositionSpans.startPosition();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int startPosition() {
|
public int startPosition() {
|
||||||
return top().startPosition();
|
return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int endPosition() {
|
public int endPosition() {
|
||||||
return top().endPosition();
|
return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<byte[]> getPayload() throws IOException {
|
public Collection<byte[]> getPayload() throws IOException {
|
||||||
ArrayList<byte[]> result = null;
|
return topPositionSpans == null
|
||||||
Spans theTop = top();
|
? null
|
||||||
if (theTop != null && theTop.isPayloadAvailable()) {
|
: topPositionSpans.isPayloadAvailable()
|
||||||
result = new ArrayList<>(theTop.getPayload());
|
? new ArrayList<>(topPositionSpans.getPayload())
|
||||||
}
|
: null;
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isPayloadAvailable() throws IOException {
|
public boolean isPayloadAvailable() throws IOException {
|
||||||
Spans top = top();
|
return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
|
||||||
return top != null && top.isPayloadAvailable();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "spans("+SpanOrQuery.this+")@"+
|
return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||||
((queue == null)?"START"
|
|
||||||
:(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private long cost = -1;
|
long cost = -1;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long cost() {
|
public long cost() {
|
||||||
|
@ -303,8 +342,8 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||||
}
|
}
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
|
class SpanPositionQueue extends PriorityQueue<Spans> {
|
||||||
|
SpanPositionQueue(int maxSize) {
|
||||||
|
super(maxSize, false); // do not prepopulate
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean lessThan(Spans s1, Spans s2) {
|
||||||
|
int start1 = s1.startPosition();
|
||||||
|
int start2 = s2.startPosition();
|
||||||
|
return (start1 < start2) ? true
|
||||||
|
: (start1 == start2) ? s1.endPosition() < s2.endPosition()
|
||||||
|
: false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -86,11 +86,12 @@ public abstract class Spans extends DocIdSetIterator {
|
||||||
*
|
*
|
||||||
* Note that the returned {@link TwoPhaseIterator}'s
|
* Note that the returned {@link TwoPhaseIterator}'s
|
||||||
* {@link TwoPhaseIterator#approximation() approximation} must
|
* {@link TwoPhaseIterator#approximation() approximation} must
|
||||||
* advance synchronously with this iterator: advancing the approximation must
|
* advance documents synchronously with this iterator:
|
||||||
|
* advancing the approximation must
|
||||||
* advance this iterator and vice-versa.
|
* advance this iterator and vice-versa.
|
||||||
*
|
*
|
||||||
* Implementing this method is typically useful on {@link Spans}s
|
* Implementing this method is typically useful on a {@link Spans}
|
||||||
* that have a high per-document overhead in order to confirm matches.
|
* that has a high per-document overhead for confirming matches.
|
||||||
*
|
*
|
||||||
* The default implementation returns {@code null}.
|
* The default implementation returns {@code null}.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -89,7 +89,7 @@ public abstract class PriorityQueue<T> {
|
||||||
* value (i.e., {@link #lessThan} should always favor the
|
* value (i.e., {@link #lessThan} should always favor the
|
||||||
* non-sentinel values).<br>
|
* non-sentinel values).<br>
|
||||||
*
|
*
|
||||||
* By default, this method returns false, which means the queue will not be
|
* By default, this method returns null, which means the queue will not be
|
||||||
* filled with sentinel values. Otherwise, the value returned will be used to
|
* filled with sentinel values. Otherwise, the value returned will be used to
|
||||||
* pre-populate the queue. Adds sentinel values to the queue.<br>
|
* pre-populate the queue. Adds sentinel values to the queue.<br>
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue