LUCENE-4514: make scorer.freq() well defined: number of matches in doc

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/cleanup2878@1403702 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-10-30 13:56:23 +00:00
parent 2850821052
commit 38bf19cdcf
41 changed files with 82 additions and 71 deletions

View File

@ -127,7 +127,7 @@ final class BooleanScorer extends Scorer {
public int docID() { return doc; }
@Override
public float freq() { return freq; }
public int freq() { return freq; }
@Override
public int nextDoc() { return NO_MORE_DOCS; }
@ -322,7 +322,7 @@ final class BooleanScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -129,7 +129,7 @@ class BooleanScorer2 extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}
@ -313,7 +313,7 @@ class BooleanScorer2 extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return countingSumScorer.freq();
}

View File

@ -85,7 +85,7 @@ public abstract class CachingCollector extends Collector {
public final int docID() { return doc; }
@Override
public final float freq() { throw new UnsupportedOperationException(); }
public final int freq() { throw new UnsupportedOperationException(); }
@Override
public final int nextDoc() { throw new UnsupportedOperationException(); }

View File

@ -138,7 +138,7 @@ class ConjunctionScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return scorers.length;
}

View File

@ -99,7 +99,7 @@ class ConjunctionTermScorer extends Scorer {
}
@Override
public float freq() {
public int freq() {
return docsAndFreqs.length;
}

View File

@ -195,7 +195,7 @@ public class ConstantScoreQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}

View File

@ -100,7 +100,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
int doc = subScorers[0].docID();
int size = numScorers;
return 1 + freq(1, size, doc) + freq(2, size, doc);

View File

@ -130,7 +130,7 @@ class DisjunctionSumScorer extends DisjunctionScorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return nrMatchers;
}

View File

@ -184,7 +184,7 @@ final class ExactPhraseScorer extends Scorer {
}
@Override
public float freq() {
public int freq() {
return freq;
}

View File

@ -204,7 +204,7 @@ public class FilteredQuery extends Query {
}
@Override
public float freq() throws IOException { return scorer.freq(); }
public int freq() throws IOException { return scorer.freq(); }
@Override
public Collection<ChildScorer> getChildren() {
@ -298,7 +298,7 @@ public class FilteredQuery extends Query {
}
@Override
public final float freq() throws IOException { return scorer.freq(); }
public final int freq() throws IOException { return scorer.freq(); }
@Override
public final Collection<ChildScorer> getChildren() {

View File

@ -758,7 +758,7 @@ public class IndexSearcher {
}
@Override
public float freq() {
public int freq() {
throw new UnsupportedOperationException();
}

View File

@ -68,7 +68,7 @@ public class MatchAllDocsQuery extends Query {
}
@Override
public float freq() {
public int freq() {
return 1;
}

View File

@ -261,7 +261,7 @@ public class MultiPhraseQuery extends Query {
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).freq;
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -303,7 +303,7 @@ public class PhraseQuery extends Query {
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
float freq = slop == 0 ? scorer.freq() : ((PhraseScorer)scorer).freq;
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -34,7 +34,7 @@ import org.apache.lucene.search.similarities.Similarity;
abstract class PhraseScorer extends Scorer {
PhrasePositions min, max;
private float freq; //phrase frequency in current doc as computed by phraseFreq().
protected float freq; //phrase frequency in current doc as computed by phraseFreq().
final Similarity.SloppySimScorer docScorer;
@ -110,14 +110,6 @@ abstract class PhraseScorer extends Scorer {
return max.doc;
}
/**
* phrase frequency in current doc as computed by phraseFreq().
*/
@Override
public final float freq() {
return freq;
}
/**
* For a document containing all the phrase query terms, compute the
* frequency of the phrase in that document.

View File

@ -105,7 +105,7 @@ class ReqExclScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return reqScorer.freq();
}

View File

@ -86,7 +86,7 @@ class ReqOptSumScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
// we might have deferred advance()
score();
return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1;

View File

@ -61,7 +61,7 @@ public class ScoreCachingWrappingScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return scorer.freq();
}

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.index.DocsEnum;
/**
* Expert: Common scoring functionality for different types of queries.
*
@ -39,7 +41,7 @@ import java.util.Collections;
* TopScoreDocCollector}) will not properly collect hits
* with these scores.
*/
public abstract class Scorer extends DocIdSetIterator {
public abstract class Scorer extends DocsEnum {
/** the Scorer's parent Weight. in some cases this may be null */
// TODO can we clean this up?
protected final Weight weight;
@ -94,14 +96,6 @@ public abstract class Scorer extends DocIdSetIterator {
*/
public abstract float score() throws IOException;
/** Returns number of matches for the current document.
* This returns a float (not int) because
* SloppyPhraseScorer discounts its freq according to how
* "sloppy" the match was.
*
* @lucene.experimental */
public abstract float freq() throws IOException;
/** returns parent Weight
* @lucene.experimental
*/

View File

@ -43,6 +43,8 @@ final class SloppyPhraseScorer extends PhraseScorer {
private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
private int numMatches;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer);
@ -75,6 +77,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
return 0.0f;
}
float freq = 0.0f;
numMatches = 0;
PhrasePositions pp = pq.pop();
int matchLength = end - pp.position;
int next = pq.top().position;
@ -85,6 +88,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
if (pp.position > next) { // done minimizing current match-length
if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match
numMatches++;
}
pq.add(pp);
pp = pq.pop();
@ -99,6 +103,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
}
if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match
numMatches++;
}
return freq;
}
@ -483,6 +488,11 @@ final class SloppyPhraseScorer extends PhraseScorer {
return tg;
}
@Override
public int freq() throws IOException {
return numMatches;
}
// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
// //if (min.doc != ?) return;
// ps.println();
@ -504,4 +514,5 @@ final class SloppyPhraseScorer extends PhraseScorer {
// }
// }
}

View File

@ -55,7 +55,7 @@ final class TermScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return docsEnum.freq();
}

View File

@ -103,12 +103,14 @@ public class PayloadTermQuery extends SpanTermQuery {
}
doc = spans.doc();
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
payloadsSeen = 0;
while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(similarity);
more = spans.next();// this moves positions to the next match in this
@ -179,7 +181,7 @@ public class PayloadTermQuery extends SpanTermQuery {
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
float freq = scorer.sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -33,6 +33,7 @@ public class SpanScorer extends Scorer {
protected int doc;
protected float freq;
protected int numMatches;
protected final Similarity.SloppySimScorer docScorer;
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
@ -77,9 +78,11 @@ public class SpanScorer extends Scorer {
}
doc = spans.doc();
freq = 0.0f;
numMatches = 0;
do {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
more = spans.next();
} while (more && (doc == spans.doc()));
return true;
@ -94,7 +97,14 @@ public class SpanScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return numMatches;
}
/** Returns the intermediate "sloppy freq" adjusted for edit distance
* @lucene.internal */
// only public so .payloads can see it.
public float sloppyFreq() throws IOException {
return freq;
}
}

View File

@ -92,11 +92,11 @@ public class SpanWeight extends Weight {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
SpanScorer scorer = (SpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
float freq = scorer.sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Norm;
import org.apache.lucene.search.similarities.Similarity;
@ -199,6 +201,11 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileQuery extends Query {
@ -227,7 +234,7 @@ final class JustCompileSearch {
}
@Override
public float freq() {
public int freq() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}

View File

@ -81,7 +81,7 @@ public class TestBooleanScorer extends LuceneTestCase
Scorer[] scorers = new Scorer[] {new Scorer(weight) {
private int doc = -1;
@Override public float score() { return 0; }
@Override public float freq() { return 0; }
@Override public int freq() { return 0; }
@Override public int docID() { return doc; }
@Override public int nextDoc() {

View File

@ -36,7 +36,7 @@ public class TestCachingCollector extends LuceneTestCase {
public float score() throws IOException { return 0; }
@Override
public float freq() throws IOException { return 0; }
public int freq() throws IOException { return 0; }
@Override
public int docID() { return 0; }

View File

@ -36,7 +36,7 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase {
return idx == scores.length ? Float.NaN : scores[idx];
}
@Override public float freq() {
@Override public int freq() {
return 1;
}

View File

@ -44,7 +44,7 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase {
return idx == scores.length ? Float.NaN : scores[idx++];
}
@Override public float freq() throws IOException {
@Override public int freq() throws IOException {
return 1;
}

View File

@ -99,7 +99,7 @@ public class BlockGroupingCollector extends Collector {
}
@Override
public float freq() {
public int freq() {
throw new UnsupportedOperationException(); // TODO: wtf does this class do?
}

View File

@ -219,7 +219,7 @@ class TermsIncludingScoreQuery extends Query {
}
@Override
public float freq() {
public int freq() {
return 1;
}
}
@ -318,7 +318,7 @@ class TermsIncludingScoreQuery extends Query {
return scores[currentDoc];
}
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}

View File

@ -169,7 +169,7 @@ public class ToChildBlockJoinQuery extends Query {
private final Bits acceptDocs;
private float parentScore;
private float parentFreq = 1;
private int parentFreq = 1;
private int childDoc = -1;
private int parentDoc;
@ -258,7 +258,7 @@ public class ToChildBlockJoinQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return parentFreq;
}

View File

@ -329,7 +329,7 @@ public class ToParentBlockJoinCollector extends Collector {
}
@Override
public float freq() {
public int freq() {
return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw?
}

View File

@ -218,7 +218,7 @@ public class ToParentBlockJoinQuery extends Query {
private int parentDoc = -1;
private int prevParentDoc;
private float parentScore;
private float parentFreq;
private int parentFreq;
private int nextChildDoc;
private int[] pendingChildDocs = new int[5];
@ -300,11 +300,10 @@ public class ToParentBlockJoinQuery extends Query {
}
float totalScore = 0;
float totalFreq = 0;
float maxScore = Float.NEGATIVE_INFINITY;
float maxFreq = 0;
childDocUpto = 0;
parentFreq = 0;
do {
//System.out.println(" c=" + nextChildDoc);
@ -318,12 +317,11 @@ public class ToParentBlockJoinQuery extends Query {
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
final float childFreq = childScorer.freq();
final int childFreq = childScorer.freq();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
maxFreq = Math.max(childFreq, maxFreq);
totalScore += childScore;
totalFreq += childFreq;
parentFreq += childFreq;
}
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
@ -335,15 +333,12 @@ public class ToParentBlockJoinQuery extends Query {
switch(scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
parentFreq = totalFreq / childDocUpto;
break;
case Max:
parentScore = maxScore;
parentFreq = maxFreq;
break;
case Total:
parentScore = totalScore;
parentFreq = totalFreq;
break;
case None:
break;
@ -365,7 +360,7 @@ public class ToParentBlockJoinQuery extends Query {
}
@Override
public float freq() {
public int freq() {
return parentFreq;
}

View File

@ -327,7 +327,7 @@ public class CustomScoreQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return subQueryScorer.freq();
}

View File

@ -167,7 +167,7 @@ public class BoostedQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return scorer.freq();
}

View File

@ -159,7 +159,7 @@ public class FunctionQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}

View File

@ -88,7 +88,7 @@ public class ValueSourceScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}
}

View File

@ -482,7 +482,7 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}

View File

@ -534,7 +534,7 @@ class JoinQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}

View File

@ -188,7 +188,7 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery implements Extend
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 1;
}