mirror of https://github.com/apache/lucene.git
LUCENE-6275: Use ConjunctionDISI in SloppyPhraseScorer.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661727 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0c078aaf4d
commit
b03688ebee
|
@ -104,6 +104,10 @@ Optimizations
|
|||
* LUCENE-6263: MultiCollector automatically caches scores when several
|
||||
collectors need them. (Adrien Grand)
|
||||
|
||||
* LUCENE-6275: SloppyPhraseScorer now uses the same logic as ConjunctionScorer
|
||||
in order to advance doc IDs, which takes advantage of the cost() API.
|
||||
(Adrien Grand)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.*;
|
|||
* Position of a term in a document that takes into account the term offset within the phrase.
|
||||
*/
|
||||
final class PhrasePositions {
|
||||
int doc; // current doc
|
||||
int position; // position in doc
|
||||
int count; // remaining pos in this doc
|
||||
int offset; // position in phrase
|
||||
|
@ -42,22 +41,6 @@ final class PhrasePositions {
|
|||
this.terms = terms;
|
||||
}
|
||||
|
||||
final boolean next() throws IOException { // increments to next doc
|
||||
doc = postings.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
final boolean skipTo(int target) throws IOException {
|
||||
doc = postings.advance(target);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
final void firstPosition() throws IOException {
|
||||
count = postings.freq(); // read first pos
|
||||
nextPosition();
|
||||
|
@ -80,7 +63,7 @@ final class PhrasePositions {
|
|||
/** for debug purposes */
|
||||
@Override
|
||||
public String toString() {
|
||||
String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
|
||||
String s = "o:"+offset+" p:"+position+" c:"+count;
|
||||
if (rptGroup >=0 ) {
|
||||
s += " rpt:"+rptGroup+",i"+rptInd;
|
||||
}
|
||||
|
|
|
@ -26,20 +26,16 @@ final class PhraseQueue extends PriorityQueue<PhrasePositions> {
|
|||
|
||||
@Override
|
||||
protected final boolean lessThan(PhrasePositions pp1, PhrasePositions pp2) {
|
||||
if (pp1.doc == pp2.doc)
|
||||
if (pp1.position == pp2.position)
|
||||
// same doc and pp.position, so decide by actual term positions.
|
||||
// rely on: pp.position == tp.position - offset.
|
||||
if (pp1.offset == pp2.offset) {
|
||||
return pp1.ord < pp2.ord;
|
||||
} else {
|
||||
return pp1.offset < pp2.offset;
|
||||
}
|
||||
else {
|
||||
return pp1.position < pp2.position;
|
||||
if (pp1.position == pp2.position)
|
||||
// same doc and pp.position, so decide by actual term positions.
|
||||
// rely on: pp.position == tp.position - offset.
|
||||
if (pp1.offset == pp2.offset) {
|
||||
return pp1.ord < pp2.ord;
|
||||
} else {
|
||||
return pp1.offset < pp2.offset;
|
||||
}
|
||||
else {
|
||||
return pp1.doc < pp2.doc;
|
||||
return pp1.position < pp2.position;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,9 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
final class SloppyPhraseScorer extends Scorer {
|
||||
private PhrasePositions min, max;
|
||||
|
||||
private final ConjunctionDISI conjunction;
|
||||
private final PhrasePositions[] phrasePositions;
|
||||
|
||||
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
|
||||
|
||||
|
@ -49,7 +51,6 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
|
||||
|
||||
private int numMatches;
|
||||
private final long cost;
|
||||
final boolean needsScores;
|
||||
|
||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
|
@ -60,25 +61,13 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
this.slop = slop;
|
||||
this.numPostings = postings==null ? 0 : postings.length;
|
||||
pq = new PhraseQueue(postings.length);
|
||||
// min(cost)
|
||||
cost = postings[0].postings.cost();
|
||||
// convert tps to a list of phrase positions.
|
||||
// note: phrase-position differs from term-position in that its position
|
||||
// reflects the phrase offset: pp.pos = tp.pos - offset.
|
||||
// this allows to easily identify a matching (exact) phrase
|
||||
// when all PhrasePositions have exactly the same position.
|
||||
if (postings.length > 0) {
|
||||
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
|
||||
max = min;
|
||||
max.doc = -1;
|
||||
for (int i = 1; i < postings.length; i++) {
|
||||
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
max.next = pp;
|
||||
max = pp;
|
||||
max.doc = -1;
|
||||
}
|
||||
max.next = min; // make it cyclic for easier manipulation
|
||||
DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length];
|
||||
phrasePositions = new PhrasePositions[postings.length];
|
||||
for (int i = 0; i < postings.length; ++i) {
|
||||
iterators[i] = postings[i].postings;
|
||||
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
}
|
||||
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -245,7 +234,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
//System.err.println("initSimple: doc: "+min.doc);
|
||||
pq.clear();
|
||||
// position pps and build queue from list
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (PhrasePositions pp : phrasePositions) {
|
||||
pp.firstPosition();
|
||||
if (pp.position > end) {
|
||||
end = pp.position;
|
||||
|
@ -267,7 +256,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
|
||||
/** move all PPs to their first position */
|
||||
private void placeFirstPositions() throws IOException {
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (PhrasePositions pp : phrasePositions) {
|
||||
pp.firstPosition();
|
||||
}
|
||||
}
|
||||
|
@ -275,7 +264,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
/** Fill the queue (all pps are already placed */
|
||||
private void fillQueue() {
|
||||
pq.clear();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (PhrasePositions pp : phrasePositions) { // iterate cyclic list: done once handled max
|
||||
if (pp.position > end) {
|
||||
end = pp.position;
|
||||
}
|
||||
|
@ -448,7 +437,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
private LinkedHashMap<Term,Integer> repeatingTerms() {
|
||||
LinkedHashMap<Term,Integer> tord = new LinkedHashMap<>();
|
||||
HashMap<Term,Integer> tcnt = new HashMap<>();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (PhrasePositions pp : phrasePositions) {
|
||||
for (Term t : pp.terms) {
|
||||
Integer cnt0 = tcnt.get(t);
|
||||
Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
|
||||
|
@ -464,7 +453,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
/** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
|
||||
private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
|
||||
ArrayList<PhrasePositions> rp = new ArrayList<>();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (PhrasePositions pp : phrasePositions) {
|
||||
for (Term t : pp.terms) {
|
||||
if (rptTerms.containsKey(t)) {
|
||||
rp.add(pp);
|
||||
|
@ -553,55 +542,47 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
// }
|
||||
// }
|
||||
|
||||
private boolean advanceMin(int target) throws IOException {
|
||||
if (!min.skipTo(target)) {
|
||||
max.doc = NO_MORE_DOCS; // for further calls to docID()
|
||||
return false;
|
||||
}
|
||||
min = min.next; // cyclic
|
||||
max = max.next; // cyclic
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return max.doc;
|
||||
return conjunction.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(max.doc + 1); // advance to the next doc after #docID()
|
||||
int doc;
|
||||
for (doc = conjunction.nextDoc(); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
|
||||
sloppyFreq = phraseFreq(); // check for phrase
|
||||
if (sloppyFreq != 0f) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return docScorer.score(max.doc, sloppyFreq);
|
||||
return docScorer.score(docID(), sloppyFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > docID();
|
||||
do {
|
||||
if (!advanceMin(target)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
while (min.doc < max.doc) {
|
||||
if (!advanceMin(max.doc)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
// found a doc with all of the terms
|
||||
int doc;
|
||||
for (doc = conjunction.advance(target); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
|
||||
sloppyFreq = phraseFreq(); // check for phrase
|
||||
target = min.doc + 1; // next target in case sloppyFreq is still 0
|
||||
} while (sloppyFreq == 0f);
|
||||
if (sloppyFreq != 0f) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// found a match
|
||||
return max.doc;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
return conjunction.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -612,36 +593,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
return new TwoPhaseDocIdSetIterator() {
|
||||
@Override
|
||||
public DocIdSetIterator approximation() {
|
||||
return new DocIdSetIterator() {
|
||||
@Override
|
||||
public int docID() {
|
||||
return SloppyPhraseScorer.this.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(max.doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > docID();
|
||||
if (!advanceMin(target)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
while (min.doc < max.doc) {
|
||||
if (!advanceMin(max.doc)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
return max.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return SloppyPhraseScorer.this.cost();
|
||||
}
|
||||
};
|
||||
return conjunction;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue