LUCENE-6275: Use ConjunctionDISI in SloppyPhraseScorer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661727 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-02-23 17:55:29 +00:00
parent 0c078aaf4d
commit b03688ebee
4 changed files with 47 additions and 112 deletions

View File

@ -104,6 +104,10 @@ Optimizations
* LUCENE-6263: MultiCollector automatically caches scores when several
collectors need them. (Adrien Grand)
* LUCENE-6275: SloppyPhraseScorer now uses the same logic as ConjunctionScorer
in order to advance doc IDs, which takes advantage of the cost() API.
(Adrien Grand)
API Changes
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()

View File

@ -24,7 +24,6 @@ import org.apache.lucene.index.*;
* Position of a term in a document that takes into account the term offset within the phrase.
*/
final class PhrasePositions {
int doc; // current doc
int position; // position in doc
int count; // remaining pos in this doc
int offset; // position in phrase
@ -42,22 +41,6 @@ final class PhrasePositions {
this.terms = terms;
}
final boolean next() throws IOException { // increments to next doc
doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
return true;
}
final boolean skipTo(int target) throws IOException {
doc = postings.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
return true;
}
final void firstPosition() throws IOException {
count = postings.freq(); // read first pos
nextPosition();
@ -80,7 +63,7 @@ final class PhrasePositions {
/** for debug purposes */
@Override
public String toString() {
String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
String s = "o:"+offset+" p:"+position+" c:"+count;
if (rptGroup >=0 ) {
s += " rpt:"+rptGroup+",i"+rptInd;
}

View File

@ -26,20 +26,16 @@ final class PhraseQueue extends PriorityQueue<PhrasePositions> {
@Override
protected final boolean lessThan(PhrasePositions pp1, PhrasePositions pp2) {
if (pp1.doc == pp2.doc)
if (pp1.position == pp2.position)
// same doc and pp.position, so decide by actual term positions.
// rely on: pp.position == tp.position - offset.
if (pp1.offset == pp2.offset) {
return pp1.ord < pp2.ord;
} else {
return pp1.offset < pp2.offset;
}
else {
return pp1.position < pp2.position;
if (pp1.position == pp2.position)
// same doc and pp.position, so decide by actual term positions.
// rely on: pp.position == tp.position - offset.
if (pp1.offset == pp2.offset) {
return pp1.ord < pp2.ord;
} else {
return pp1.offset < pp2.offset;
}
else {
return pp1.doc < pp2.doc;
return pp1.position < pp2.position;
}
}
}

View File

@ -30,7 +30,9 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.FixedBitSet;
final class SloppyPhraseScorer extends Scorer {
private PhrasePositions min, max;
private final ConjunctionDISI conjunction;
private final PhrasePositions[] phrasePositions;
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
@ -49,7 +51,6 @@ final class SloppyPhraseScorer extends Scorer {
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
private int numMatches;
private final long cost;
final boolean needsScores;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
@ -60,25 +61,13 @@ final class SloppyPhraseScorer extends Scorer {
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
// min(cost)
cost = postings[0].postings.cost();
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;
}
max.next = min; // make it cyclic for easier manipulation
DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length];
phrasePositions = new PhrasePositions[postings.length];
for (int i = 0; i < postings.length; ++i) {
iterators[i] = postings[i].postings;
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
}
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
}
/**
@ -245,7 +234,7 @@ final class SloppyPhraseScorer extends Scorer {
//System.err.println("initSimple: doc: "+min.doc);
pq.clear();
// position pps and build queue from list
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (PhrasePositions pp : phrasePositions) {
pp.firstPosition();
if (pp.position > end) {
end = pp.position;
@ -267,7 +256,7 @@ final class SloppyPhraseScorer extends Scorer {
/** move all PPs to their first position */
private void placeFirstPositions() throws IOException {
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (PhrasePositions pp : phrasePositions) {
pp.firstPosition();
}
}
@ -275,7 +264,7 @@ final class SloppyPhraseScorer extends Scorer {
/** Fill the queue (all pps are already placed */
private void fillQueue() {
pq.clear();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (PhrasePositions pp : phrasePositions) { // iterate cyclic list: done once handled max
if (pp.position > end) {
end = pp.position;
}
@ -448,7 +437,7 @@ final class SloppyPhraseScorer extends Scorer {
private LinkedHashMap<Term,Integer> repeatingTerms() {
LinkedHashMap<Term,Integer> tord = new LinkedHashMap<>();
HashMap<Term,Integer> tcnt = new HashMap<>();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (PhrasePositions pp : phrasePositions) {
for (Term t : pp.terms) {
Integer cnt0 = tcnt.get(t);
Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
@ -464,7 +453,7 @@ final class SloppyPhraseScorer extends Scorer {
/** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
ArrayList<PhrasePositions> rp = new ArrayList<>();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (PhrasePositions pp : phrasePositions) {
for (Term t : pp.terms) {
if (rptTerms.containsKey(t)) {
rp.add(pp);
@ -553,55 +542,47 @@ final class SloppyPhraseScorer extends Scorer {
// }
// }
private boolean advanceMin(int target) throws IOException {
if (!min.skipTo(target)) {
max.doc = NO_MORE_DOCS; // for further calls to docID()
return false;
}
min = min.next; // cyclic
max = max.next; // cyclic
return true;
}
@Override
public int docID() {
return max.doc;
return conjunction.docID();
}
@Override
public int nextDoc() throws IOException {
return advance(max.doc + 1); // advance to the next doc after #docID()
int doc;
for (doc = conjunction.nextDoc(); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
sloppyFreq = phraseFreq(); // check for phrase
if (sloppyFreq != 0f) {
break;
}
}
return doc;
}
@Override
public float score() {
return docScorer.score(max.doc, sloppyFreq);
return docScorer.score(docID(), sloppyFreq);
}
@Override
public int advance(int target) throws IOException {
assert target > docID();
do {
if (!advanceMin(target)) {
return NO_MORE_DOCS;
}
while (min.doc < max.doc) {
if (!advanceMin(max.doc)) {
return NO_MORE_DOCS;
}
}
// found a doc with all of the terms
int doc;
for (doc = conjunction.advance(target); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
sloppyFreq = phraseFreq(); // check for phrase
target = min.doc + 1; // next target in case sloppyFreq is still 0
} while (sloppyFreq == 0f);
if (sloppyFreq != 0f) {
break;
}
}
// found a match
return max.doc;
return doc;
}
@Override
public long cost() {
return cost;
return conjunction.cost();
}
@Override
@ -612,36 +593,7 @@ final class SloppyPhraseScorer extends Scorer {
return new TwoPhaseDocIdSetIterator() {
@Override
public DocIdSetIterator approximation() {
return new DocIdSetIterator() {
@Override
public int docID() {
return SloppyPhraseScorer.this.docID();
}
@Override
public int nextDoc() throws IOException {
return advance(max.doc + 1);
}
@Override
public int advance(int target) throws IOException {
assert target > docID();
if (!advanceMin(target)) {
return NO_MORE_DOCS;
}
while (min.doc < max.doc) {
if (!advanceMin(max.doc)) {
return NO_MORE_DOCS;
}
}
return max.doc;
}
@Override
public long cost() {
return SloppyPhraseScorer.this.cost();
}
};
return conjunction;
}
@Override