LUCENE-4514: make scorer.freq() well defined: number of matches in doc

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/cleanup2878@1403702 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-10-30 13:56:23 +00:00
parent 2850821052
commit 38bf19cdcf
41 changed files with 82 additions and 71 deletions

View File

@ -127,7 +127,7 @@ final class BooleanScorer extends Scorer {
public int docID() { return doc; } public int docID() { return doc; }
@Override @Override
public float freq() { return freq; } public int freq() { return freq; }
@Override @Override
public int nextDoc() { return NO_MORE_DOCS; } public int nextDoc() { return NO_MORE_DOCS; }
@ -322,7 +322,7 @@ final class BooleanScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }

View File

@ -129,7 +129,7 @@ class BooleanScorer2 extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }
@ -313,7 +313,7 @@ class BooleanScorer2 extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return countingSumScorer.freq(); return countingSumScorer.freq();
} }

View File

@ -85,7 +85,7 @@ public abstract class CachingCollector extends Collector {
public final int docID() { return doc; } public final int docID() { return doc; }
@Override @Override
public final float freq() { throw new UnsupportedOperationException(); } public final int freq() { throw new UnsupportedOperationException(); }
@Override @Override
public final int nextDoc() { throw new UnsupportedOperationException(); } public final int nextDoc() { throw new UnsupportedOperationException(); }

View File

@ -138,7 +138,7 @@ class ConjunctionScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return scorers.length; return scorers.length;
} }

View File

@ -99,7 +99,7 @@ class ConjunctionTermScorer extends Scorer {
} }
@Override @Override
public float freq() { public int freq() {
return docsAndFreqs.length; return docsAndFreqs.length;
} }

View File

@ -195,7 +195,7 @@ public class ConstantScoreQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }

View File

@ -100,7 +100,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
int doc = subScorers[0].docID(); int doc = subScorers[0].docID();
int size = numScorers; int size = numScorers;
return 1 + freq(1, size, doc) + freq(2, size, doc); return 1 + freq(1, size, doc) + freq(2, size, doc);

View File

@ -130,7 +130,7 @@ class DisjunctionSumScorer extends DisjunctionScorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return nrMatchers; return nrMatchers;
} }

View File

@ -184,7 +184,7 @@ final class ExactPhraseScorer extends Scorer {
} }
@Override @Override
public float freq() { public int freq() {
return freq; return freq;
} }

View File

@ -204,7 +204,7 @@ public class FilteredQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { return scorer.freq(); } public int freq() throws IOException { return scorer.freq(); }
@Override @Override
public Collection<ChildScorer> getChildren() { public Collection<ChildScorer> getChildren() {
@ -298,7 +298,7 @@ public class FilteredQuery extends Query {
} }
@Override @Override
public final float freq() throws IOException { return scorer.freq(); } public final int freq() throws IOException { return scorer.freq(); }
@Override @Override
public final Collection<ChildScorer> getChildren() { public final Collection<ChildScorer> getChildren() {

View File

@ -758,7 +758,7 @@ public class IndexSearcher {
} }
@Override @Override
public float freq() { public int freq() {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }

View File

@ -68,7 +68,7 @@ public class MatchAllDocsQuery extends Query {
} }
@Override @Override
public float freq() { public int freq() {
return 1; return 1;
} }

View File

@ -261,7 +261,7 @@ public class MultiPhraseQuery extends Query {
if (scorer != null) { if (scorer != null) {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).freq;
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -303,7 +303,7 @@ public class PhraseQuery extends Query {
if (scorer != null) { if (scorer != null) {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = slop == 0 ? scorer.freq() : ((PhraseScorer)scorer).freq;
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -34,7 +34,7 @@ import org.apache.lucene.search.similarities.Similarity;
abstract class PhraseScorer extends Scorer { abstract class PhraseScorer extends Scorer {
PhrasePositions min, max; PhrasePositions min, max;
private float freq; //phrase frequency in current doc as computed by phraseFreq(). protected float freq; //phrase frequency in current doc as computed by phraseFreq().
final Similarity.SloppySimScorer docScorer; final Similarity.SloppySimScorer docScorer;
@ -110,14 +110,6 @@ abstract class PhraseScorer extends Scorer {
return max.doc; return max.doc;
} }
/**
* phrase frequency in current doc as computed by phraseFreq().
*/
@Override
public final float freq() {
return freq;
}
/** /**
* For a document containing all the phrase query terms, compute the * For a document containing all the phrase query terms, compute the
* frequency of the phrase in that document. * frequency of the phrase in that document.

View File

@ -105,7 +105,7 @@ class ReqExclScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return reqScorer.freq(); return reqScorer.freq();
} }

View File

@ -86,7 +86,7 @@ class ReqOptSumScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
// we might have deferred advance() // we might have deferred advance()
score(); score();
return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1; return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1;

View File

@ -61,7 +61,7 @@ public class ScoreCachingWrappingScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return scorer.freq(); return scorer.freq();
} }

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import org.apache.lucene.index.DocsEnum;
/** /**
* Expert: Common scoring functionality for different types of queries. * Expert: Common scoring functionality for different types of queries.
* *
@ -39,7 +41,7 @@ import java.util.Collections;
* TopScoreDocCollector}) will not properly collect hits * TopScoreDocCollector}) will not properly collect hits
* with these scores. * with these scores.
*/ */
public abstract class Scorer extends DocIdSetIterator { public abstract class Scorer extends DocsEnum {
/** the Scorer's parent Weight. in some cases this may be null */ /** the Scorer's parent Weight. in some cases this may be null */
// TODO can we clean this up? // TODO can we clean this up?
protected final Weight weight; protected final Weight weight;
@ -94,14 +96,6 @@ public abstract class Scorer extends DocIdSetIterator {
*/ */
public abstract float score() throws IOException; public abstract float score() throws IOException;
/** Returns number of matches for the current document.
* This returns a float (not int) because
* SloppyPhraseScorer discounts its freq according to how
* "sloppy" the match was.
*
* @lucene.experimental */
public abstract float freq() throws IOException;
/** returns parent Weight /** returns parent Weight
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -43,6 +43,8 @@ final class SloppyPhraseScorer extends PhraseScorer {
private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
private int numMatches;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppySimScorer docScorer) { int slop, Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer); super(weight, postings, docScorer);
@ -75,6 +77,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
return 0.0f; return 0.0f;
} }
float freq = 0.0f; float freq = 0.0f;
numMatches = 0;
PhrasePositions pp = pq.pop(); PhrasePositions pp = pq.pop();
int matchLength = end - pp.position; int matchLength = end - pp.position;
int next = pq.top().position; int next = pq.top().position;
@ -85,6 +88,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
if (pp.position > next) { // done minimizing current match-length if (pp.position > next) { // done minimizing current match-length
if (matchLength <= slop) { if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match freq += docScorer.computeSlopFactor(matchLength); // score match
numMatches++;
} }
pq.add(pp); pq.add(pp);
pp = pq.pop(); pp = pq.pop();
@ -99,6 +103,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
} }
if (matchLength <= slop) { if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match freq += docScorer.computeSlopFactor(matchLength); // score match
numMatches++;
} }
return freq; return freq;
} }
@ -483,6 +488,11 @@ final class SloppyPhraseScorer extends PhraseScorer {
return tg; return tg;
} }
@Override
public int freq() throws IOException {
return numMatches;
}
// private void printQueue(PrintStream ps, PhrasePositions ext, String title) { // private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
// //if (min.doc != ?) return; // //if (min.doc != ?) return;
// ps.println(); // ps.println();
@ -504,4 +514,5 @@ final class SloppyPhraseScorer extends PhraseScorer {
// } // }
// } // }
} }

View File

@ -55,7 +55,7 @@ final class TermScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return docsEnum.freq(); return docsEnum.freq();
} }

View File

@ -103,12 +103,14 @@ public class PayloadTermQuery extends SpanTermQuery {
} }
doc = spans.doc(); doc = spans.doc();
freq = 0.0f; freq = 0.0f;
numMatches = 0;
payloadScore = 0; payloadScore = 0;
payloadsSeen = 0; payloadsSeen = 0;
while (more && doc == spans.doc()) { while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start(); int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength); freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(similarity); processPayload(similarity);
more = spans.next();// this moves positions to the next match in this more = spans.next();// this moves positions to the next match in this
@ -179,7 +181,7 @@ public class PayloadTermQuery extends SpanTermQuery {
if (scorer != null) { if (scorer != null) {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
Explanation expl = new Explanation(); Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -33,6 +33,7 @@ public class SpanScorer extends Scorer {
protected int doc; protected int doc;
protected float freq; protected float freq;
protected int numMatches;
protected final Similarity.SloppySimScorer docScorer; protected final Similarity.SloppySimScorer docScorer;
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer) protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
@ -77,9 +78,11 @@ public class SpanScorer extends Scorer {
} }
doc = spans.doc(); doc = spans.doc();
freq = 0.0f; freq = 0.0f;
numMatches = 0;
do { do {
int matchLength = spans.end() - spans.start(); int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength); freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
more = spans.next(); more = spans.next();
} while (more && (doc == spans.doc())); } while (more && (doc == spans.doc()));
return true; return true;
@ -94,7 +97,14 @@ public class SpanScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public float sloppyFreq() throws IOException {
return freq; return freq;
} }
} }

View File

@ -92,11 +92,11 @@ public class SpanWeight extends Weight {
@Override @Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException { public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs()); SpanScorer scorer = (SpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) { if (scorer != null) {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Norm; import org.apache.lucene.index.Norm;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
@ -199,6 +201,11 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
} }
static final class JustCompileQuery extends Query { static final class JustCompileQuery extends Query {
@ -227,7 +234,7 @@ final class JustCompileSearch {
} }
@Override @Override
public float freq() { public int freq() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }

View File

@ -81,7 +81,7 @@ public class TestBooleanScorer extends LuceneTestCase
Scorer[] scorers = new Scorer[] {new Scorer(weight) { Scorer[] scorers = new Scorer[] {new Scorer(weight) {
private int doc = -1; private int doc = -1;
@Override public float score() { return 0; } @Override public float score() { return 0; }
@Override public float freq() { return 0; } @Override public int freq() { return 0; }
@Override public int docID() { return doc; } @Override public int docID() { return doc; }
@Override public int nextDoc() { @Override public int nextDoc() {

View File

@ -36,7 +36,7 @@ public class TestCachingCollector extends LuceneTestCase {
public float score() throws IOException { return 0; } public float score() throws IOException { return 0; }
@Override @Override
public float freq() throws IOException { return 0; } public int freq() throws IOException { return 0; }
@Override @Override
public int docID() { return 0; } public int docID() { return 0; }

View File

@ -36,7 +36,7 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase {
return idx == scores.length ? Float.NaN : scores[idx]; return idx == scores.length ? Float.NaN : scores[idx];
} }
@Override public float freq() { @Override public int freq() {
return 1; return 1;
} }

View File

@ -44,7 +44,7 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase {
return idx == scores.length ? Float.NaN : scores[idx++]; return idx == scores.length ? Float.NaN : scores[idx++];
} }
@Override public float freq() throws IOException { @Override public int freq() throws IOException {
return 1; return 1;
} }

View File

@ -99,7 +99,7 @@ public class BlockGroupingCollector extends Collector {
} }
@Override @Override
public float freq() { public int freq() {
throw new UnsupportedOperationException(); // TODO: wtf does this class do? throw new UnsupportedOperationException(); // TODO: wtf does this class do?
} }

View File

@ -219,7 +219,7 @@ class TermsIncludingScoreQuery extends Query {
} }
@Override @Override
public float freq() { public int freq() {
return 1; return 1;
} }
} }
@ -318,7 +318,7 @@ class TermsIncludingScoreQuery extends Query {
return scores[currentDoc]; return scores[currentDoc];
} }
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }

View File

@ -169,7 +169,7 @@ public class ToChildBlockJoinQuery extends Query {
private final Bits acceptDocs; private final Bits acceptDocs;
private float parentScore; private float parentScore;
private float parentFreq = 1; private int parentFreq = 1;
private int childDoc = -1; private int childDoc = -1;
private int parentDoc; private int parentDoc;
@ -258,7 +258,7 @@ public class ToChildBlockJoinQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return parentFreq; return parentFreq;
} }

View File

@ -329,7 +329,7 @@ public class ToParentBlockJoinCollector extends Collector {
} }
@Override @Override
public float freq() { public int freq() {
return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw? return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw?
} }

View File

@ -218,7 +218,7 @@ public class ToParentBlockJoinQuery extends Query {
private int parentDoc = -1; private int parentDoc = -1;
private int prevParentDoc; private int prevParentDoc;
private float parentScore; private float parentScore;
private float parentFreq; private int parentFreq;
private int nextChildDoc; private int nextChildDoc;
private int[] pendingChildDocs = new int[5]; private int[] pendingChildDocs = new int[5];
@ -300,11 +300,10 @@ public class ToParentBlockJoinQuery extends Query {
} }
float totalScore = 0; float totalScore = 0;
float totalFreq = 0;
float maxScore = Float.NEGATIVE_INFINITY; float maxScore = Float.NEGATIVE_INFINITY;
float maxFreq = 0;
childDocUpto = 0; childDocUpto = 0;
parentFreq = 0;
do { do {
//System.out.println(" c=" + nextChildDoc); //System.out.println(" c=" + nextChildDoc);
@ -318,12 +317,11 @@ public class ToParentBlockJoinQuery extends Query {
if (scoreMode != ScoreMode.None) { if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode // TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score(); final float childScore = childScorer.score();
final float childFreq = childScorer.freq(); final int childFreq = childScorer.freq();
pendingChildScores[childDocUpto] = childScore; pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore); maxScore = Math.max(childScore, maxScore);
maxFreq = Math.max(childFreq, maxFreq);
totalScore += childScore; totalScore += childScore;
totalFreq += childFreq; parentFreq += childFreq;
} }
childDocUpto++; childDocUpto++;
nextChildDoc = childScorer.nextDoc(); nextChildDoc = childScorer.nextDoc();
@ -335,15 +333,12 @@ public class ToParentBlockJoinQuery extends Query {
switch(scoreMode) { switch(scoreMode) {
case Avg: case Avg:
parentScore = totalScore / childDocUpto; parentScore = totalScore / childDocUpto;
parentFreq = totalFreq / childDocUpto;
break; break;
case Max: case Max:
parentScore = maxScore; parentScore = maxScore;
parentFreq = maxFreq;
break; break;
case Total: case Total:
parentScore = totalScore; parentScore = totalScore;
parentFreq = totalFreq;
break; break;
case None: case None:
break; break;
@ -365,7 +360,7 @@ public class ToParentBlockJoinQuery extends Query {
} }
@Override @Override
public float freq() { public int freq() {
return parentFreq; return parentFreq;
} }

View File

@ -327,7 +327,7 @@ public class CustomScoreQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return subQueryScorer.freq(); return subQueryScorer.freq();
} }

View File

@ -167,7 +167,7 @@ public class BoostedQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return scorer.freq(); return scorer.freq();
} }

View File

@ -159,7 +159,7 @@ public class FunctionQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }

View File

@ -88,7 +88,7 @@ public class ValueSourceScorer extends Scorer {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }
} }

View File

@ -482,7 +482,7 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }

View File

@ -534,7 +534,7 @@ class JoinQuery extends Query {
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }

View File

@ -188,7 +188,7 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery implements Extend
} }
@Override @Override
public float freq() throws IOException { public int freq() throws IOException {
return 1; return 1;
} }