mirror of https://github.com/apache/lucene.git
LUCENE-6373: complete two phase doc id iteration support for Spans
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1675776 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
558d85f892
commit
5105d036bd
|
@ -50,6 +50,10 @@ New Features
|
|||
FilterSpans to just have an accept(Spans candidate) method for
|
||||
subclasses. (Robert Muir)
|
||||
|
||||
* LUCENE-6373: SpanOrQuery shares disjunction logic with boolean
|
||||
queries, and supports two-phased iterators to avoid loading
|
||||
positions when possible. (Paul Elschot via Robert Muir)
|
||||
|
||||
* LUCENE-6352: Added a new query time join to the join module that uses
|
||||
global ordinals, which is faster for subsequent joins between reopens.
|
||||
(Martijn van Groningen, Adrien Grand)
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
/** A conjunction of DocIdSetIterators.
|
||||
* This iterates over the doc ids that are present in each given DocIdSetIterator.
|
||||
|
@ -35,20 +34,16 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
|||
/** Create a conjunction over the provided iterators, taking advantage of
|
||||
* {@link TwoPhaseIterator}. */
|
||||
public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
|
||||
assert iterators.size() >= 2;
|
||||
final List<DocIdSetIterator> allIterators = new ArrayList<>();
|
||||
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
|
||||
for (DocIdSetIterator iterator : iterators) {
|
||||
TwoPhaseIterator twoPhaseIterator = null;
|
||||
if (iterator instanceof Scorer) {
|
||||
twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
|
||||
} else if (iterator instanceof Spans) {
|
||||
twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
|
||||
}
|
||||
if (twoPhaseIterator != null) {
|
||||
allIterators.add(twoPhaseIterator.approximation());
|
||||
twoPhaseIterators.add(twoPhaseIterator);
|
||||
for (DocIdSetIterator iter : iterators) {
|
||||
TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(iter);
|
||||
if (twoPhaseIter != null) {
|
||||
allIterators.add(twoPhaseIter.approximation());
|
||||
twoPhaseIterators.add(twoPhaseIter);
|
||||
} else { // no approximation support, use the iterator as-is
|
||||
allIterators.add(iterator);
|
||||
allIterators.add(iter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -63,6 +58,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
|||
final DocIdSetIterator[] others;
|
||||
|
||||
ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
|
||||
assert iterators.size() >= 2;
|
||||
// Sort the array the first time to allow the least frequent DocsEnum to
|
||||
// lead the matching.
|
||||
CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
|
||||
|
|
|
@ -23,37 +23,13 @@ import java.util.Iterator;
|
|||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* A priority queue of scorers that orders by current doc ID.
|
||||
* A priority queue of DocIdSetIterators that orders by current doc ID.
|
||||
* This specialization is needed over {@link PriorityQueue} because the
|
||||
* pluggable comparison function makes the rebalancing quite slow.
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper> {
|
||||
|
||||
static class ScorerWrapper {
|
||||
final Scorer scorer;
|
||||
final long cost;
|
||||
int doc; // the current doc, used for comparison
|
||||
ScorerWrapper next; // reference to a next element, see #topList
|
||||
|
||||
// An approximation of the scorer, or the scorer itself if it does not
|
||||
// support two-phase iteration
|
||||
final DocIdSetIterator approximation;
|
||||
// A two-phase view of the scorer, or null if the scorer does not support
|
||||
// two-phase iteration
|
||||
final TwoPhaseIterator twoPhaseView;
|
||||
|
||||
ScorerWrapper(Scorer scorer) {
|
||||
this.scorer = scorer;
|
||||
this.cost = scorer.cost();
|
||||
this.doc = -1;
|
||||
this.twoPhaseView = scorer.asTwoPhaseIterator();
|
||||
if (twoPhaseView != null) {
|
||||
approximation = twoPhaseView.approximation();
|
||||
} else {
|
||||
approximation = scorer;
|
||||
}
|
||||
}
|
||||
}
|
||||
public final class DisiPriorityQueue<Iter extends DocIdSetIterator>
|
||||
implements Iterable<DisiWrapper<Iter>> {
|
||||
|
||||
static int leftNode(int node) {
|
||||
return ((node + 1) << 1) - 1;
|
||||
|
@ -67,27 +43,27 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
|||
return ((node + 1) >>> 1) - 1;
|
||||
}
|
||||
|
||||
private final ScorerWrapper[] heap;
|
||||
private final DisiWrapper<Iter>[] heap;
|
||||
private int size;
|
||||
|
||||
ScorerPriorityQueue(int maxSize) {
|
||||
heap = new ScorerWrapper[maxSize];
|
||||
public DisiPriorityQueue(int maxSize) {
|
||||
heap = new DisiWrapper[maxSize];
|
||||
size = 0;
|
||||
}
|
||||
|
||||
int size() {
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
ScorerWrapper top() {
|
||||
public DisiWrapper<Iter> top() {
|
||||
return heap[0];
|
||||
}
|
||||
|
||||
/** Get the list of scorers which are on the current doc. */
|
||||
ScorerWrapper topList() {
|
||||
final ScorerWrapper[] heap = this.heap;
|
||||
public DisiWrapper<Iter> topList() {
|
||||
final DisiWrapper<Iter>[] heap = this.heap;
|
||||
final int size = this.size;
|
||||
ScorerWrapper list = heap[0];
|
||||
DisiWrapper<Iter> list = heap[0];
|
||||
list.next = null;
|
||||
if (size >= 3) {
|
||||
list = topList(list, heap, size, 1);
|
||||
|
@ -98,14 +74,15 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
|||
return list;
|
||||
}
|
||||
|
||||
// prepend w1 (scorer) to w2 (list)
|
||||
private static ScorerWrapper prepend(ScorerWrapper w1, ScorerWrapper w2) {
|
||||
// prepend w1 (iterator) to w2 (list)
|
||||
private DisiWrapper<Iter> prepend(DisiWrapper<Iter> w1, DisiWrapper<Iter> w2) {
|
||||
w1.next = w2;
|
||||
return w1;
|
||||
}
|
||||
|
||||
private static ScorerWrapper topList(ScorerWrapper list, ScorerWrapper[] heap, int size, int i) {
|
||||
final ScorerWrapper w = heap[i];
|
||||
private DisiWrapper<Iter> topList(DisiWrapper<Iter> list, DisiWrapper<Iter>[] heap,
|
||||
int size, int i) {
|
||||
final DisiWrapper<Iter> w = heap[i];
|
||||
if (w.doc == list.doc) {
|
||||
list = prepend(w, list);
|
||||
final int left = leftNode(i);
|
||||
|
@ -120,37 +97,37 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
|||
return list;
|
||||
}
|
||||
|
||||
ScorerWrapper add(ScorerWrapper entry) {
|
||||
final ScorerWrapper[] heap = this.heap;
|
||||
public DisiWrapper<Iter> add(DisiWrapper<Iter> entry) {
|
||||
final DisiWrapper<Iter>[] heap = this.heap;
|
||||
final int size = this.size;
|
||||
heap[size] = entry;
|
||||
upHeap(heap, size);
|
||||
upHeap(size);
|
||||
this.size = size + 1;
|
||||
return heap[0];
|
||||
}
|
||||
|
||||
ScorerWrapper pop() {
|
||||
final ScorerWrapper[] heap = this.heap;
|
||||
final ScorerWrapper result = heap[0];
|
||||
public DisiWrapper<Iter> pop() {
|
||||
final DisiWrapper<Iter>[] heap = this.heap;
|
||||
final DisiWrapper<Iter> result = heap[0];
|
||||
final int i = --size;
|
||||
heap[0] = heap[i];
|
||||
heap[i] = null;
|
||||
downHeap(heap, i);
|
||||
downHeap(i);
|
||||
return result;
|
||||
}
|
||||
|
||||
ScorerWrapper updateTop() {
|
||||
downHeap(heap, size);
|
||||
public DisiWrapper<Iter> updateTop() {
|
||||
downHeap(size);
|
||||
return heap[0];
|
||||
}
|
||||
|
||||
ScorerWrapper updateTop(ScorerWrapper topReplacement) {
|
||||
DisiWrapper<Iter> updateTop(DisiWrapper<Iter> topReplacement) {
|
||||
heap[0] = topReplacement;
|
||||
return updateTop();
|
||||
}
|
||||
|
||||
static void upHeap(ScorerWrapper[] heap, int i) {
|
||||
final ScorerWrapper node = heap[i];
|
||||
void upHeap(int i) {
|
||||
final DisiWrapper<Iter> node = heap[i];
|
||||
final int nodeDoc = node.doc;
|
||||
int j = parentNode(i);
|
||||
while (j >= 0 && nodeDoc < heap[j].doc) {
|
||||
|
@ -161,9 +138,9 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
|||
heap[i] = node;
|
||||
}
|
||||
|
||||
static void downHeap(ScorerWrapper[] heap, int size) {
|
||||
void downHeap(int size) {
|
||||
int i = 0;
|
||||
final ScorerWrapper node = heap[0];
|
||||
final DisiWrapper<Iter> node = heap[0];
|
||||
int j = leftNode(i);
|
||||
if (j < size) {
|
||||
int k = rightNode(j);
|
||||
|
@ -186,8 +163,10 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
|
|||
}
|
||||
|
||||
@Override
|
||||
public Iterator<ScorerWrapper> iterator() {
|
||||
public Iterator<DisiWrapper<Iter>> iterator() {
|
||||
return Arrays.asList(heap).subList(0, size).iterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Wrapper used in {@link DisiPriorityQueue}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class DisiWrapper<Iter extends DocIdSetIterator> {
|
||||
public final Iter iterator;
|
||||
public final long cost;
|
||||
public int doc; // the current doc, used for comparison
|
||||
public DisiWrapper<Iter> next; // reference to a next element, see #topList
|
||||
|
||||
// An approximation of the iterator, or the iterator itself if it does not
|
||||
// support two-phase iteration
|
||||
public final DocIdSetIterator approximation;
|
||||
// A two-phase view of the iterator, or null if the iterator does not support
|
||||
// two-phase iteration
|
||||
public final TwoPhaseIterator twoPhaseView;
|
||||
|
||||
public int lastApproxMatchDoc; // last doc of approximation that did match
|
||||
public int lastApproxNonMatchDoc; // last doc of approximation that did not match
|
||||
|
||||
public DisiWrapper(Iter iterator) {
|
||||
this.iterator = iterator;
|
||||
this.cost = iterator.cost();
|
||||
this.doc = -1;
|
||||
this.twoPhaseView = TwoPhaseIterator.asTwoPhaseIterator(iterator);
|
||||
|
||||
if (twoPhaseView != null) {
|
||||
approximation = twoPhaseView.approximation();
|
||||
} else {
|
||||
approximation = iterator;
|
||||
}
|
||||
this.lastApproxNonMatchDoc = -2;
|
||||
this.lastApproxMatchDoc = -2;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
package org.apache.lucene.search;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link DocIdSetIterator} which is a disjunction of the approximations of
|
||||
* the provided iterators.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class DisjunctionDISIApproximation<Iter extends DocIdSetIterator>
|
||||
extends DocIdSetIterator {
|
||||
|
||||
final DisiPriorityQueue<Iter> subIterators;
|
||||
final long cost;
|
||||
|
||||
public DisjunctionDISIApproximation(DisiPriorityQueue<Iter> subIterators) {
|
||||
this.subIterators = subIterators;
|
||||
long cost = 0;
|
||||
for (DisiWrapper<Iter> w : subIterators) {
|
||||
cost += w.cost;
|
||||
}
|
||||
this.cost = cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return subIterators.top().doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
DisiWrapper<Iter> top = subIterators.top();
|
||||
final int doc = top.doc;
|
||||
do {
|
||||
top.doc = top.approximation.nextDoc();
|
||||
top = subIterators.updateTop();
|
||||
} while (top.doc == doc);
|
||||
|
||||
return top.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
DisiWrapper<Iter> top = subIterators.top();
|
||||
do {
|
||||
top.doc = top.approximation.advance(target);
|
||||
top = subIterators.updateTop();
|
||||
} while (top.doc < target);
|
||||
|
||||
return top.doc;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -19,8 +19,6 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
||||
|
||||
/**
|
||||
* The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers
|
||||
* is generated in document number order. The score for each document is the maximum of the scores computed
|
||||
|
@ -48,11 +46,11 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected float score(ScorerWrapper topList) throws IOException {
|
||||
protected float score(DisiWrapper<Scorer> topList) throws IOException {
|
||||
float scoreSum = 0;
|
||||
float scoreMax = 0;
|
||||
for (ScorerWrapper w = topList; w != null; w = w.next) {
|
||||
final float subScore = w.scorer.score();
|
||||
for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
|
||||
final float subScore = w.iterator.score();
|
||||
scoreSum += subScore;
|
||||
if (subScore > scoreMax) {
|
||||
scoreMax = subScore;
|
||||
|
|
|
@ -22,29 +22,27 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
||||
|
||||
/**
|
||||
* Base class for Scorers that score disjunctions.
|
||||
*/
|
||||
abstract class DisjunctionScorer extends Scorer {
|
||||
|
||||
private final boolean needsScores;
|
||||
private final ScorerPriorityQueue subScorers;
|
||||
private final DisiPriorityQueue<Scorer> subScorers;
|
||||
private final long cost;
|
||||
|
||||
/** Linked list of scorers which are on the current doc */
|
||||
private ScorerWrapper topScorers;
|
||||
private DisiWrapper<Scorer> topScorers;
|
||||
|
||||
protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
|
||||
super(weight);
|
||||
if (subScorers.size() <= 1) {
|
||||
throw new IllegalArgumentException("There must be at least 2 subScorers");
|
||||
}
|
||||
this.subScorers = new ScorerPriorityQueue(subScorers.size());
|
||||
this.subScorers = new DisiPriorityQueue<Scorer>(subScorers.size());
|
||||
long cost = 0;
|
||||
for (Scorer scorer : subScorers) {
|
||||
final ScorerWrapper w = new ScorerWrapper(scorer);
|
||||
final DisiWrapper<Scorer> w = new DisiWrapper<>(scorer);
|
||||
cost += w.cost;
|
||||
this.subScorers.add(w);
|
||||
}
|
||||
|
@ -52,69 +50,17 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
this.needsScores = needsScores;
|
||||
}
|
||||
|
||||
/**
|
||||
* A {@link DocIdSetIterator} which is a disjunction of the approximations of
|
||||
* the provided iterators.
|
||||
*/
|
||||
private static class DisjunctionDISIApproximation extends DocIdSetIterator {
|
||||
|
||||
final ScorerPriorityQueue subScorers;
|
||||
final long cost;
|
||||
|
||||
DisjunctionDISIApproximation(ScorerPriorityQueue subScorers) {
|
||||
this.subScorers = subScorers;
|
||||
long cost = 0;
|
||||
for (ScorerWrapper w : subScorers) {
|
||||
cost += w.cost;
|
||||
}
|
||||
this.cost = cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return subScorers.top().doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
ScorerWrapper top = subScorers.top();
|
||||
final int doc = top.doc;
|
||||
do {
|
||||
top.doc = top.approximation.nextDoc();
|
||||
top = subScorers.updateTop();
|
||||
} while (top.doc == doc);
|
||||
|
||||
return top.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
ScorerWrapper top = subScorers.top();
|
||||
do {
|
||||
top.doc = top.approximation.advance(target);
|
||||
top = subScorers.updateTop();
|
||||
} while (top.doc < target);
|
||||
|
||||
return top.doc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
boolean hasApproximation = false;
|
||||
for (ScorerWrapper w : subScorers) {
|
||||
for (DisiWrapper<Scorer> w : subScorers) {
|
||||
if (w.twoPhaseView != null) {
|
||||
hasApproximation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasApproximation == false) {
|
||||
if (! hasApproximation) {
|
||||
// none of the sub scorers supports approximations
|
||||
return null;
|
||||
}
|
||||
|
@ -122,13 +68,13 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
// note it is important to share the same pq as this scorer so that
|
||||
// rebalancing the pq through the approximation will also rebalance
|
||||
// the pq in this scorer.
|
||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
|
||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Scorer>(subScorers)) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
ScorerWrapper topScorers = subScorers.topList();
|
||||
DisiWrapper<Scorer> topScorers = subScorers.topList();
|
||||
// remove the head of the list as long as it does not match
|
||||
while (topScorers.twoPhaseView != null && topScorers.twoPhaseView.matches() == false) {
|
||||
while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) {
|
||||
topScorers = topScorers.next;
|
||||
if (topScorers == null) {
|
||||
return false;
|
||||
|
@ -138,9 +84,9 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
if (needsScores) {
|
||||
// if scores or freqs are needed, we also need to remove scorers
|
||||
// from the top list that do not actually match
|
||||
ScorerWrapper previous = topScorers;
|
||||
for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
|
||||
if (w.twoPhaseView != null && w.twoPhaseView.matches() == false) {
|
||||
DisiWrapper<Scorer> previous = topScorers;
|
||||
for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
|
||||
if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
|
||||
// w does not match, remove it
|
||||
previous.next = w.next;
|
||||
} else {
|
||||
|
@ -175,10 +121,10 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
@Override
|
||||
public final int nextDoc() throws IOException {
|
||||
topScorers = null;
|
||||
ScorerWrapper top = subScorers.top();
|
||||
DisiWrapper<Scorer> top = subScorers.top();
|
||||
final int doc = top.doc;
|
||||
do {
|
||||
top.doc = top.scorer.nextDoc();
|
||||
top.doc = top.iterator.nextDoc();
|
||||
top = subScorers.updateTop();
|
||||
} while (top.doc == doc);
|
||||
|
||||
|
@ -188,9 +134,9 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
@Override
|
||||
public final int advance(int target) throws IOException {
|
||||
topScorers = null;
|
||||
ScorerWrapper top = subScorers.top();
|
||||
DisiWrapper<Scorer> top = subScorers.top();
|
||||
do {
|
||||
top.doc = top.scorer.advance(target);
|
||||
top.doc = top.iterator.advance(target);
|
||||
top = subScorers.updateTop();
|
||||
} while (top.doc < target);
|
||||
|
||||
|
@ -203,7 +149,7 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
topScorers = subScorers.topList();
|
||||
}
|
||||
int freq = 1;
|
||||
for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
|
||||
for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
|
||||
freq += 1;
|
||||
}
|
||||
return freq;
|
||||
|
@ -218,13 +164,13 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** Compute the score for the given linked list of scorers. */
|
||||
protected abstract float score(ScorerWrapper topList) throws IOException;
|
||||
protected abstract float score(DisiWrapper<Scorer> topList) throws IOException;
|
||||
|
||||
@Override
|
||||
public final Collection<ChildScorer> getChildren() {
|
||||
ArrayList<ChildScorer> children = new ArrayList<>();
|
||||
for (ScorerWrapper scorer : subScorers) {
|
||||
children.add(new ChildScorer(scorer.scorer, "SHOULD"));
|
||||
for (DisiWrapper<Scorer> scorer : subScorers) {
|
||||
children.add(new ChildScorer(scorer.iterator, "SHOULD"));
|
||||
}
|
||||
return children;
|
||||
}
|
||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
||||
|
||||
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
|
||||
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
|
||||
*/
|
||||
|
@ -39,11 +37,11 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected float score(ScorerWrapper topList) throws IOException {
|
||||
protected float score(DisiWrapper<Scorer> topList) throws IOException {
|
||||
double score = 0;
|
||||
int freq = 0;
|
||||
for (ScorerWrapper w = topList; w != null; w = w.next) {
|
||||
score += w.scorer.score();
|
||||
for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
|
||||
score += w.iterator.score();
|
||||
freq += 1;
|
||||
}
|
||||
return (float)score * coord[freq];
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
/**
|
||||
* This abstract class defines methods to iterate over a set of non-decreasing
|
||||
* doc ids. Note that this class assumes it iterates on doc Ids, and therefore
|
||||
|
@ -175,4 +177,5 @@ public abstract class DocIdSetIterator {
|
|||
* completely inaccurate.
|
||||
*/
|
||||
public abstract long cost();
|
||||
|
||||
}
|
||||
|
|
|
@ -23,12 +23,11 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
|
||||
import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
|
||||
import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
|
||||
import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
|
||||
|
||||
/**
|
||||
* A {@link Scorer} for {@link BooleanQuery} when
|
||||
|
@ -83,17 +82,17 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
|
||||
// list of scorers which 'lead' the iteration and are currently
|
||||
// positioned on 'doc'
|
||||
ScorerWrapper lead;
|
||||
DisiWrapper<Scorer> lead;
|
||||
int doc; // current doc ID of the leads
|
||||
int freq; // number of scorers on the desired doc ID
|
||||
|
||||
// priority queue of scorers that are too advanced compared to the current
|
||||
// doc. Ordered by doc ID.
|
||||
final ScorerPriorityQueue head;
|
||||
final DisiPriorityQueue<Scorer> head;
|
||||
|
||||
// priority queue of scorers which are behind the current doc.
|
||||
// Ordered by cost.
|
||||
final ScorerWrapper[] tail;
|
||||
final DisiWrapper<Scorer>[] tail;
|
||||
int tailSize;
|
||||
|
||||
final Collection<ChildScorer> childScorers;
|
||||
|
@ -113,13 +112,13 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
this.coord = coord;
|
||||
this.doc = -1;
|
||||
|
||||
head = new ScorerPriorityQueue(scorers.size() - minShouldMatch + 1);
|
||||
head = new DisiPriorityQueue<Scorer>(scorers.size() - minShouldMatch + 1);
|
||||
// there can be at most minShouldMatch - 1 scorers beyond the current position
|
||||
// otherwise we might be skipping over matching documents
|
||||
tail = new ScorerWrapper[minShouldMatch - 1];
|
||||
tail = new DisiWrapper[minShouldMatch - 1];
|
||||
|
||||
for (Scorer scorer : scorers) {
|
||||
addLead(new ScorerWrapper(scorer));
|
||||
addLead(new DisiWrapper<Scorer>(scorer));
|
||||
}
|
||||
|
||||
List<ChildScorer> children = new ArrayList<>();
|
||||
|
@ -145,13 +144,13 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
// We are moving to the next doc ID, so scorers in 'lead' need to go in
|
||||
// 'tail'. If there is not enough space in 'tail', then we take the least
|
||||
// costly scorers and advance them.
|
||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
||||
final ScorerWrapper evicted = insertTailWithOverFlow(s);
|
||||
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||
final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
|
||||
if (evicted != null) {
|
||||
if (evicted.doc == doc) {
|
||||
evicted.doc = evicted.scorer.nextDoc();
|
||||
evicted.doc = evicted.iterator.nextDoc();
|
||||
} else {
|
||||
evicted.doc = evicted.scorer.advance(doc + 1);
|
||||
evicted.doc = evicted.iterator.advance(doc + 1);
|
||||
}
|
||||
head.add(evicted);
|
||||
}
|
||||
|
@ -164,23 +163,23 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// Same logic as in nextDoc
|
||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
||||
final ScorerWrapper evicted = insertTailWithOverFlow(s);
|
||||
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||
final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
|
||||
if (evicted != null) {
|
||||
evicted.doc = evicted.scorer.advance(target);
|
||||
evicted.doc = evicted.iterator.advance(target);
|
||||
head.add(evicted);
|
||||
}
|
||||
}
|
||||
|
||||
// But this time there might also be scorers in 'head' behind the desired
|
||||
// target so we need to do the same thing that we did on 'lead' on 'head'
|
||||
ScorerWrapper headTop = head.top();
|
||||
DisiWrapper<Scorer> headTop = head.top();
|
||||
while (headTop.doc < target) {
|
||||
final ScorerWrapper evicted = insertTailWithOverFlow(headTop);
|
||||
final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(headTop);
|
||||
// We know that the tail is full since it contains at most
|
||||
// minShouldMatch - 1 entries and we just moved at least minShouldMatch
|
||||
// entries to it, so evicted is not null
|
||||
evicted.doc = evicted.scorer.advance(target);
|
||||
evicted.doc = evicted.iterator.advance(target);
|
||||
headTop = head.updateTop(evicted);
|
||||
}
|
||||
|
||||
|
@ -188,20 +187,20 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
return doNext();
|
||||
}
|
||||
|
||||
private void addLead(ScorerWrapper lead) {
|
||||
private void addLead(DisiWrapper<Scorer> lead) {
|
||||
lead.next = this.lead;
|
||||
this.lead = lead;
|
||||
freq += 1;
|
||||
}
|
||||
|
||||
private void pushBackLeads() throws IOException {
|
||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
||||
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||
addTail(s);
|
||||
}
|
||||
}
|
||||
|
||||
private void advanceTail(ScorerWrapper top) throws IOException {
|
||||
top.doc = top.scorer.advance(doc);
|
||||
private void advanceTail(DisiWrapper<Scorer> top) throws IOException {
|
||||
top.doc = top.iterator.advance(doc);
|
||||
if (top.doc == doc) {
|
||||
addLead(top);
|
||||
} else {
|
||||
|
@ -210,7 +209,7 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
}
|
||||
|
||||
private void advanceTail() throws IOException {
|
||||
final ScorerWrapper top = popTail();
|
||||
final DisiWrapper<Scorer> top = popTail();
|
||||
advanceTail(top);
|
||||
}
|
||||
|
||||
|
@ -276,8 +275,8 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
// we need to know about all matches
|
||||
updateFreq();
|
||||
double score = 0;
|
||||
for (ScorerWrapper s = lead; s != null; s = s.next) {
|
||||
score += s.scorer.score();
|
||||
for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
|
||||
score += s.iterator.score();
|
||||
}
|
||||
return coord[freq] * (float) score;
|
||||
}
|
||||
|
@ -289,12 +288,12 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
||||
private ScorerWrapper insertTailWithOverFlow(ScorerWrapper s) {
|
||||
private DisiWrapper<Scorer> insertTailWithOverFlow(DisiWrapper<Scorer> s) {
|
||||
if (tailSize < tail.length) {
|
||||
addTail(s);
|
||||
return null;
|
||||
} else if (tail.length >= 1) {
|
||||
final ScorerWrapper top = tail[0];
|
||||
final DisiWrapper<Scorer> top = tail[0];
|
||||
if (top.cost < s.cost) {
|
||||
tail[0] = s;
|
||||
downHeapCost(tail, tailSize);
|
||||
|
@ -305,16 +304,16 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** Add an entry to 'tail'. Fails if over capacity. */
|
||||
private void addTail(ScorerWrapper s) {
|
||||
private void addTail(DisiWrapper<Scorer> s) {
|
||||
tail[tailSize] = s;
|
||||
upHeapCost(tail, tailSize);
|
||||
tailSize += 1;
|
||||
}
|
||||
|
||||
/** Pop the least-costly scorer from 'tail'. */
|
||||
private ScorerWrapper popTail() {
|
||||
private DisiWrapper<Scorer> popTail() {
|
||||
assert tailSize > 0;
|
||||
final ScorerWrapper result = tail[0];
|
||||
final DisiWrapper<Scorer> result = tail[0];
|
||||
tail[0] = tail[--tailSize];
|
||||
downHeapCost(tail, tailSize);
|
||||
return result;
|
||||
|
@ -322,8 +321,8 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
|
||||
/** Heap helpers */
|
||||
|
||||
private static void upHeapCost(ScorerWrapper[] heap, int i) {
|
||||
final ScorerWrapper node = heap[i];
|
||||
private static void upHeapCost(DisiWrapper<Scorer>[] heap, int i) {
|
||||
final DisiWrapper<Scorer> node = heap[i];
|
||||
final long nodeCost = node.cost;
|
||||
int j = parentNode(i);
|
||||
while (j >= 0 && nodeCost < heap[j].cost) {
|
||||
|
@ -334,9 +333,9 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
heap[i] = node;
|
||||
}
|
||||
|
||||
private static void downHeapCost(ScorerWrapper[] heap, int size) {
|
||||
private static void downHeapCost(DisiWrapper<Scorer>[] heap, int size) {
|
||||
int i = 0;
|
||||
final ScorerWrapper node = heap[0];
|
||||
final DisiWrapper<Scorer> node = heap[0];
|
||||
int j = leftNode(i);
|
||||
if (j < size) {
|
||||
int k = rightNode(j);
|
||||
|
|
|
@ -20,9 +20,13 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
/**
|
||||
* Returned by {@link Scorer#asTwoPhaseIterator()} to expose an approximation of
|
||||
* a {@link DocIdSetIterator}. When the {@link #approximation()}'s
|
||||
* Returned by {@link Scorer#asTwoPhaseIterator()}
|
||||
* and {@link Spans#asTwoPhaseIterator()}
|
||||
* to expose an approximation of a {@link DocIdSetIterator}.
|
||||
* When the {@link #approximation()}'s
|
||||
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
|
||||
* return, {@link #matches()} needs to be checked in order to know whether the
|
||||
* returned doc ID actually matches.
|
||||
|
@ -89,4 +93,16 @@ public abstract class TwoPhaseIterator {
|
|||
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
|
||||
public abstract boolean matches() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
|
||||
* when available * otherwise returns null.
|
||||
*/
|
||||
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
|
||||
return (iter instanceof Scorer)
|
||||
? ((Scorer) iter).asTwoPhaseIterator()
|
||||
: (iter instanceof Spans)
|
||||
? ((Spans) iter).asTwoPhaseIterator()
|
||||
: null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,9 +31,13 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.DisiPriorityQueue;
|
||||
import org.apache.lucene.search.DisiWrapper;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.DisjunctionDISIApproximation;
|
||||
|
||||
|
||||
/** Matches the union of its clauses.
|
||||
*/
|
||||
|
@ -146,35 +150,16 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
|
||||
private class SpanQueue extends PriorityQueue<Spans> {
|
||||
public SpanQueue(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final boolean lessThan(Spans spans1, Spans spans2) {
|
||||
if (spans1.docID() == spans2.docID()) {
|
||||
if (spans1.startPosition() == spans2.startPosition()) {
|
||||
return spans1.endPosition() < spans2.endPosition();
|
||||
} else {
|
||||
return spans1.startPosition() < spans2.startPosition();
|
||||
}
|
||||
} else {
|
||||
return spans1.docID() < spans2.docID();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
|
||||
throws IOException {
|
||||
|
||||
ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
|
||||
|
||||
for (SpanQuery seq : clauses) {
|
||||
Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
|
||||
if (subSpan != null) {
|
||||
subSpans.add(subSpan);
|
||||
for (SpanQuery sq : clauses) {
|
||||
Spans spans = sq.getSpans(context, acceptDocs, termContexts);
|
||||
if (spans != null) {
|
||||
subSpans.add(spans);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,114 +169,168 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
return subSpans.get(0);
|
||||
}
|
||||
|
||||
SpanQueue queue = new SpanQueue(clauses.size());
|
||||
DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
|
||||
for (Spans spans : subSpans) {
|
||||
queue.add(spans);
|
||||
byDocQueue.add(new DisiWrapper<>(spans));
|
||||
}
|
||||
|
||||
SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
|
||||
|
||||
return new Spans() {
|
||||
Spans topPositionSpans = null;
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (queue.size() == 0) { // all done
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
int currentDoc = top().docID();
|
||||
|
||||
if (currentDoc == -1) { // initially
|
||||
return advance(0);
|
||||
}
|
||||
|
||||
topPositionSpans = null;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
int currentDoc = topDocSpans.doc;
|
||||
do {
|
||||
if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
|
||||
queue.updateTop();
|
||||
} else {
|
||||
queue.pop(); // exhausted a clause
|
||||
if (queue.size() == 0) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
// assert queue.size() > 0;
|
||||
int doc = top().docID();
|
||||
if (doc > currentDoc) {
|
||||
return doc;
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
private Spans top() {
|
||||
return queue.top();
|
||||
topDocSpans.doc = topDocSpans.iterator.nextDoc();
|
||||
topDocSpans = byDocQueue.updateTop();
|
||||
} while (topDocSpans.doc == currentDoc);
|
||||
return topDocSpans.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
while ((queue.size() > 0) && (top().docID() < target)) {
|
||||
if (top().advance(target) != NO_MORE_DOCS) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
queue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
|
||||
topPositionSpans = null;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
do {
|
||||
topDocSpans.doc = topDocSpans.iterator.advance(target);
|
||||
topDocSpans = byDocQueue.updateTop();
|
||||
} while (topDocSpans.doc < target);
|
||||
return topDocSpans.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return (queue == null) ? -1
|
||||
: (queue.size() > 0) ? top().docID()
|
||||
: NO_MORE_DOCS;
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
return topDocSpans.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
boolean hasApproximation = false;
|
||||
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||
if (w.twoPhaseView != null) {
|
||||
hasApproximation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (! hasApproximation) { // none of the sub spans supports approximations
|
||||
return null;
|
||||
}
|
||||
|
||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int lastDocTwoPhaseMatched = -1;
|
||||
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||
// remove the head of the list as long as it does not match
|
||||
final int currentDoc = listAtCurrentDoc.doc;
|
||||
while (listAtCurrentDoc.twoPhaseView != null) {
|
||||
if (listAtCurrentDoc.twoPhaseView.matches()) {
|
||||
// use this spans for positions at current doc:
|
||||
listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
|
||||
break;
|
||||
}
|
||||
// do not use this spans for positions at current doc:
|
||||
listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
|
||||
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||
if (listAtCurrentDoc == null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
lastDocTwoPhaseMatched = currentDoc;
|
||||
topPositionSpans = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
void fillPositionQueue() throws IOException { // called at first nextStartPosition
|
||||
assert byPositionQueue.size() == 0;
|
||||
// add all matching Spans at current doc to byPositionQueue
|
||||
DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
|
||||
while (listAtCurrentDoc != null) {
|
||||
Spans spansAtDoc = listAtCurrentDoc.iterator;
|
||||
if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
|
||||
if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
|
||||
if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
|
||||
spansAtDoc = null;
|
||||
} else {
|
||||
if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
|
||||
if (! listAtCurrentDoc.twoPhaseView.matches()) {
|
||||
spansAtDoc = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (spansAtDoc != null) {
|
||||
assert spansAtDoc.docID() == listAtCurrentDoc.doc;
|
||||
assert spansAtDoc.startPosition() == -1;
|
||||
spansAtDoc.nextStartPosition();
|
||||
assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
|
||||
byPositionQueue.add(spansAtDoc);
|
||||
}
|
||||
listAtCurrentDoc = listAtCurrentDoc.next;
|
||||
}
|
||||
assert byPositionQueue.size() > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextStartPosition() throws IOException {
|
||||
top().nextStartPosition();
|
||||
queue.updateTop();
|
||||
int startPos = top().startPosition();
|
||||
while (startPos == -1) { // initially at this doc
|
||||
top().nextStartPosition();
|
||||
queue.updateTop();
|
||||
startPos = top().startPosition();
|
||||
DisiWrapper<Spans> topDocSpans = byDocQueue.top();
|
||||
assert topDocSpans.doc != NO_MORE_DOCS;
|
||||
if (topPositionSpans == null) {
|
||||
byPositionQueue.clear();
|
||||
fillPositionQueue(); // fills byPositionQueue at first position
|
||||
topPositionSpans = byPositionQueue.top();
|
||||
} else {
|
||||
topPositionSpans.nextStartPosition();
|
||||
topPositionSpans = byPositionQueue.updateTop();
|
||||
}
|
||||
return startPos;
|
||||
return topPositionSpans.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return top().startPosition();
|
||||
return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return top().endPosition();
|
||||
return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<byte[]> getPayload() throws IOException {
|
||||
ArrayList<byte[]> result = null;
|
||||
Spans theTop = top();
|
||||
if (theTop != null && theTop.isPayloadAvailable()) {
|
||||
result = new ArrayList<>(theTop.getPayload());
|
||||
}
|
||||
return result;
|
||||
return topPositionSpans == null
|
||||
? null
|
||||
: topPositionSpans.isPayloadAvailable()
|
||||
? new ArrayList<>(topPositionSpans.getPayload())
|
||||
: null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPayloadAvailable() throws IOException {
|
||||
Spans top = top();
|
||||
return top != null && top.isPayloadAvailable();
|
||||
return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spans("+SpanOrQuery.this+")@"+
|
||||
((queue == null)?"START"
|
||||
:(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
|
||||
return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition();
|
||||
}
|
||||
|
||||
private long cost = -1;
|
||||
long cost = -1;
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
|
@ -303,8 +342,8 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
class SpanPositionQueue extends PriorityQueue<Spans> {
|
||||
SpanPositionQueue(int maxSize) {
|
||||
super(maxSize, false); // do not prepopulate
|
||||
}
|
||||
|
||||
protected boolean lessThan(Spans s1, Spans s2) {
|
||||
int start1 = s1.startPosition();
|
||||
int start2 = s2.startPosition();
|
||||
return (start1 < start2) ? true
|
||||
: (start1 == start2) ? s1.endPosition() < s2.endPosition()
|
||||
: false;
|
||||
}
|
||||
}
|
||||
|
|
@ -86,11 +86,12 @@ public abstract class Spans extends DocIdSetIterator {
|
|||
*
|
||||
* Note that the returned {@link TwoPhaseIterator}'s
|
||||
* {@link TwoPhaseIterator#approximation() approximation} must
|
||||
* advance synchronously with this iterator: advancing the approximation must
|
||||
* advance documents synchronously with this iterator:
|
||||
* advancing the approximation must
|
||||
* advance this iterator and vice-versa.
|
||||
*
|
||||
* Implementing this method is typically useful on {@link Spans}s
|
||||
* that have a high per-document overhead in order to confirm matches.
|
||||
* Implementing this method is typically useful on a {@link Spans}
|
||||
* that has a high per-document overhead for confirming matches.
|
||||
*
|
||||
* The default implementation returns {@code null}.
|
||||
*/
|
||||
|
|
|
@ -89,7 +89,7 @@ public abstract class PriorityQueue<T> {
|
|||
* value (i.e., {@link #lessThan} should always favor the
|
||||
* non-sentinel values).<br>
|
||||
*
|
||||
* By default, this method returns false, which means the queue will not be
|
||||
* By default, this method returns null, which means the queue will not be
|
||||
* filled with sentinel values. Otherwise, the value returned will be used to
|
||||
* pre-populate the queue. Adds sentinel values to the queue.<br>
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue