LUCENE-4933: collapse Exact/SloppySimScorer into SimScorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1490971 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-06-08 12:42:36 +00:00
parent d08b2810fd
commit 290ae62a44
31 changed files with 101 additions and 400 deletions

View File

@ -90,6 +90,11 @@ Changes in backwards compatibility policy
categories. You should set TakmiSampleFixer on SamplingParams if required (but
notice that this means slower search). (Rob Audenaerde, Gilad Barkai, Shai Erera)
* LUCENE-4933: Replace ExactSimScorer/SloppySimScorer with just SimScorer. Previously
there were 2 implementations as a performance hack to support tableization of
sqrt(), but this caching is removed, as sqrt is implemented in hardware with modern
jvms and its faster not to cache. (Robert Muir)
Bug Fixes
* LUCENE-4997: Internal test framework's tests are sensitive to previous
@ -130,6 +135,9 @@ Bug Fixes
multi-valued fields exceed maxLength (Tomás Fernández Löbbe
via Mike McCandless)
* LUCENE-4933: SweetSpotSimilarity didn't apply its tf function to some
queries (SloppyPhraseQuery, SpanQueries). (Robert Muir)
Optimizations
* LUCENE-4936: Improve numeric doc values compression in case all values share

View File

@ -56,10 +56,10 @@ final class ExactPhraseScorer extends Scorer {
private int docID = -1;
private int freq;
private final Similarity.ExactSimScorer docScorer;
private final Similarity.SimScorer docScorer;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.ExactSimScorer docScorer) throws IOException {
Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;

View File

@ -31,7 +31,7 @@ import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@ -245,14 +245,14 @@ public class MultiPhraseQuery extends Query {
}
if (slop == 0) {
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
if (s.noDocs) {
return null;
} else {
return s;
}
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
}
@ -263,7 +263,7 @@ public class MultiPhraseQuery extends Query {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -33,7 +33,7 @@ import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@ -282,7 +282,7 @@ public class PhraseQuery extends Query {
}
if (slop == 0) { // optimize exact case
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
if (s.noDocs) {
return null;
} else {
@ -290,7 +290,7 @@ public class PhraseQuery extends Query {
}
} else {
return
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
}
@ -306,7 +306,7 @@ public class PhraseQuery extends Query {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -34,7 +34,7 @@ final class SloppyPhraseScorer extends Scorer {
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
private final Similarity.SloppySimScorer docScorer;
private final Similarity.SimScorer docScorer;
private final int slop;
private final int numPostings;
@ -52,7 +52,7 @@ final class SloppyPhraseScorer extends Scorer {
private final long cost;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppySimScorer docScorer) {
int slop, Similarity.SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
this.slop = slop;

View File

@ -29,7 +29,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@ -84,7 +84,7 @@ public class TermQuery extends Query {
}
DocsEnum docs = termsEnum.docs(acceptDocs, null);
assert docs != null;
return new TermScorer(this, docs, similarity.exactSimScorer(stats, context));
return new TermScorer(this, docs, similarity.simScorer(stats, context));
}
/**
@ -116,7 +116,7 @@ public class TermQuery extends Query {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
ExactSimScorer docScorer = similarity.exactSimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));

View File

@ -26,7 +26,7 @@ import org.apache.lucene.search.similarities.Similarity;
*/
final class TermScorer extends Scorer {
private final DocsEnum docsEnum;
private final Similarity.ExactSimScorer docScorer;
private final Similarity.SimScorer docScorer;
/**
* Construct a <code>TermScorer</code>.
@ -36,10 +36,10 @@ final class TermScorer extends Scorer {
* @param td
* An iterator over the documents matching the <code>Term</code>.
* @param docScorer
* The </code>Similarity.ExactSimScorer</code> implementation
* The </code>Similarity.SimScorer</code> implementation
* to be used for score computations.
*/
TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) {
TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
this.docsEnum = td;

View File

@ -441,9 +441,8 @@ on the built-in available scoring models and extending or changing Similarity.
explain(AtomicReaderContext context, int doc)} &mdash; Provide a means for explaining why a given document was
scored the way it was.
Typically a weight such as TermWeight
that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will make use of the Similarity's implementations:
{@link org.apache.lucene.search.similarities.Similarity.ExactSimScorer#explain(int, Explanation) ExactSimScorer#explain(int doc, Explanation freq)},
and {@link org.apache.lucene.search.similarities.Similarity.SloppySimScorer#explain(int, Explanation) SloppySimScorer#explain(int doc, Explanation freq)}
that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will make use of the Similarity's implementation:
{@link org.apache.lucene.search.similarities.Similarity.SimScorer#explain(int, Explanation) SimScorer#explain(int doc, Explanation freq)}.
</li>
</li>
</ol>
@ -468,7 +467,7 @@ on the built-in available scoring models and extending or changing Similarity.
{@link org.apache.lucene.search.Scorer#score score()} &mdash; Return the score of the
current document. This value can be determined in any appropriate way for an application. For instance, the
{@link org.apache.lucene.search.TermScorer TermScorer} simply defers to the configured Similarity:
{@link org.apache.lucene.search.similarities.Similarity.ExactSimScorer#score(int, int) ExactSimScorer.score(int doc, int freq)}.
{@link org.apache.lucene.search.similarities.Similarity.SimScorer#score(int, float) SimScorer.score(int doc, float freq)}.
</li>
<li>
{@link org.apache.lucene.search.Scorer#freq freq()} &mdash; Returns the number of matches

View File

@ -25,7 +25,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.NearSpansOrdered;
import org.apache.lucene.search.spans.NearSpansUnordered;
import org.apache.lucene.search.spans.SpanNearQuery;
@ -53,7 +53,7 @@ import java.util.Iterator;
* <p/>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
*
* @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
*/
public class PayloadNearQuery extends SpanNearQuery {
protected String fieldName;
@ -151,7 +151,7 @@ public class PayloadNearQuery extends SpanNearQuery {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.sloppySimScorer(stats, context));
similarity, similarity.simScorer(stats, context));
}
@Override
@ -161,7 +161,7 @@ public class PayloadNearQuery extends SpanNearQuery {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
@ -190,7 +190,7 @@ public class PayloadNearQuery extends SpanNearQuery {
private int payloadsSeen;
protected PayloadNearSpanScorer(Spans spans, Weight weight,
Similarity similarity, Similarity.SloppySimScorer docScorer) throws IOException {
Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
this.spans = spans;
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -49,7 +49,7 @@ import java.io.IOException;
* which returns 1 by default.
* <p/>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
* @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
**/
public class PayloadTermQuery extends SpanTermQuery {
protected PayloadFunction function;
@ -82,7 +82,7 @@ public class PayloadTermQuery extends SpanTermQuery {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.sloppySimScorer(stats, context));
this, similarity.simScorer(stats, context));
}
protected class PayloadTermSpanScorer extends SpanScorer {
@ -91,7 +91,7 @@ public class PayloadTermQuery extends SpanTermQuery {
protected int payloadsSeen;
private final TermSpans termSpans;
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
termSpans = spans;
}
@ -182,7 +182,7 @@ public class PayloadTermQuery extends SpanTermQuery {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -212,80 +212,18 @@ public class BM25Similarity extends Similarity {
}
@Override
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
public final SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
BM25Stats bm25stats = (BM25Stats) stats;
final NumericDocValues norms = context.reader().getNormValues(bm25stats.field);
return norms == null
? new ExactBM25DocScorerNoNorms(bm25stats)
: new ExactBM25DocScorer(bm25stats, norms);
}
@Override
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
BM25Stats bm25stats = (BM25Stats) stats;
return new SloppyBM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
}
private class ExactBM25DocScorer extends ExactSimScorer {
private final BM25Stats stats;
private final float weightValue;
private final NumericDocValues norms;
private final float[] cache;
ExactBM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
assert norms != null;
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
this.cache = stats.cache;
this.norms = norms;
}
@Override
public float score(int doc, int freq) {
return weightValue * freq / (freq + cache[(byte)norms.get(doc) & 0xFF]);
}
@Override
public Explanation explain(int doc, Explanation freq) {
return explainScore(doc, freq, stats, norms);
}
}
/** there are no norms, we act as if b=0 */
private class ExactBM25DocScorerNoNorms extends ExactSimScorer {
private final BM25Stats stats;
private final float weightValue;
private static final int SCORE_CACHE_SIZE = 32;
private float[] scoreCache = new float[SCORE_CACHE_SIZE];
ExactBM25DocScorerNoNorms(BM25Stats stats) {
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
scoreCache[i] = weightValue * i / (i + k1);
}
@Override
public float score(int doc, int freq) {
// TODO: maybe score cache is more trouble than its worth?
return freq < SCORE_CACHE_SIZE // check cache
? scoreCache[freq] // cache hit
: weightValue * freq / (freq + k1); // cache miss
}
@Override
public Explanation explain(int doc, Explanation freq) {
return explainScore(doc, freq, stats, null);
}
}
private class SloppyBM25DocScorer extends SloppySimScorer {
private class BM25DocScorer extends SimScorer {
private final BM25Stats stats;
private final float weightValue; // boost * idf * (k1 + 1)
private final NumericDocValues norms;
private final float[] cache;
SloppyBM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1);
this.cache = stats.cache;

View File

@ -57,60 +57,25 @@ public class MultiSimilarity extends Similarity {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
SimScorer subScorers[] = new SimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
subScorers[i] = sims[i].simScorer(((MultiStats)stats).subStats[i], context);
}
return new MultiExactDocScorer(subScorers);
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
}
return new MultiSloppyDocScorer(subScorers);
return new MultiSimScorer(subScorers);
}
static class MultiExactDocScorer extends ExactSimScorer {
private final ExactSimScorer subScorers[];
static class MultiSimScorer extends SimScorer {
private final SimScorer subScorers[];
MultiExactDocScorer(ExactSimScorer subScorers[]) {
this.subScorers = subScorers;
}
@Override
public float score(int doc, int freq) {
float sum = 0.0f;
for (ExactSimScorer subScorer : subScorers) {
sum += subScorer.score(doc, freq);
}
return sum;
}
@Override
public Explanation explain(int doc, Explanation freq) {
Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
for (ExactSimScorer subScorer : subScorers) {
expl.addDetail(subScorer.explain(doc, freq));
}
return expl;
}
}
static class MultiSloppyDocScorer extends SloppySimScorer {
private final SloppySimScorer subScorers[];
MultiSloppyDocScorer(SloppySimScorer subScorers[]) {
MultiSimScorer(SimScorer subScorers[]) {
this.subScorers = subScorers;
}
@Override
public float score(int doc, float freq) {
float sum = 0.0f;
for (SloppySimScorer subScorer : subScorers) {
for (SimScorer subScorer : subScorers) {
sum += subScorer.score(doc, freq);
}
return sum;
@ -119,7 +84,7 @@ public class MultiSimilarity extends Similarity {
@Override
public Explanation explain(int doc, Explanation freq) {
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
for (SloppySimScorer subScorer : subScorers) {
for (SimScorer subScorer : subScorers) {
expl.addDetail(subScorer.explain(doc, freq));
}
return expl;

View File

@ -54,15 +54,9 @@ public abstract class PerFieldSimilarityWrapper extends Similarity {
}
@Override
public final ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
public final SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
return perFieldWeight.delegate.exactSimScorer(perFieldWeight.delegateWeight, context);
}
@Override
public final SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
return perFieldWeight.delegate.sloppySimScorer(perFieldWeight.delegateWeight, context);
return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
}
/**

View File

@ -88,10 +88,8 @@ import org.apache.lucene.util.SmallFloat; // javadoc
* is called for each query leaf node, {@link Similarity#queryNorm(float)} is called for the top-level
* query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
* <li>For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)}
* (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
* {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as
* SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
* <li>For each segment in the index, the Query creates a {@link #simScorer(SimWeight, AtomicReaderContext)}
* The score() method is called for each matching document.
* </ol>
* <p>
* <a name="explaintime"/>
@ -166,76 +164,31 @@ public abstract class Similarity {
* @return SimWeight object with the information this Similarity needs to score a query.
*/
public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats);
/**
* Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index.
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
* @param context segment of the inverted index to be scored.
* @return ExactSimScorer for scoring documents across <code>context</code>
* @throws IOException if there is a low-level I/O error
*/
public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
/**
* Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index.
* Creates a new {@link Similarity.SimScorer} to score matching documents from a segment of the inverted index.
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
* @param context segment of the inverted index to be scored.
* @return SloppySimScorer for scoring documents across <code>context</code>
* @throws IOException if there is a low-level I/O error
*/
public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
public abstract SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
/**
* API for scoring exact queries such as {@link TermQuery} and
* exact {@link PhraseQuery}.
* <p>
* Frequencies are integers (the term or phrase frequency within the document)
*/
public static abstract class ExactSimScorer {
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
public ExactSimScorer() {}
/**
* Score a single document
* @param doc document id
* @param freq term frequency
* @return document's score
*/
public abstract float score(int doc, int freq);
/**
* Explain the score for a single document
* @param doc document id
* @param freq Explanation of how the term frequency was computed
* @return document's score
*/
public Explanation explain(int doc, Explanation freq) {
Explanation result = new Explanation(score(doc, (int)freq.getValue()),
"score(doc=" + doc + ",freq=" + freq.getValue() +"), with freq of:");
result.addDetail(freq);
return result;
}
}
/**
* API for scoring "sloppy" queries such as {@link SpanQuery} and
* sloppy {@link PhraseQuery}.
* API for scoring "sloppy" queries such as {@link TermQuery},
* {@link SpanQuery}, and {@link PhraseQuery}.
* <p>
* Frequencies are floating-point values: an approximate
* within-document frequency adjusted for "sloppiness" by
* {@link SloppySimScorer#computeSlopFactor(int)}.
* {@link SimScorer#computeSlopFactor(int)}.
*/
public static abstract class SloppySimScorer {
public static abstract class SimScorer {
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
public SloppySimScorer() {}
public SimScorer() {}
/**
* Score a single document

View File

@ -190,38 +190,20 @@ public abstract class SimilarityBase extends Similarity {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
SimScorer subScorers[] = new SimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) {
BasicStats basicstats = (BasicStats) subStats[i];
subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
return new MultiSimilarity.MultiExactDocScorer(subScorers);
return new MultiSimilarity.MultiSimScorer(subScorers);
} else {
BasicStats basicstats = (BasicStats) stats;
return new BasicExactDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) {
BasicStats basicstats = (BasicStats) subStats[i];
subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
} else {
BasicStats basicstats = (BasicStats) stats;
return new BasicSloppyDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
}
@ -277,46 +259,17 @@ public abstract class SimilarityBase extends Similarity {
// --------------------------------- Classes ---------------------------------
/** Delegates the {@link #score(int, int)} and
* {@link #explain(int, Explanation)} methods to
* {@link SimilarityBase#score(BasicStats, float, float)} and
* {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
* respectively.
*/
private class BasicExactDocScorer extends ExactSimScorer {
private final BasicStats stats;
private final NumericDocValues norms;
BasicExactDocScorer(BasicStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.norms = norms;
}
@Override
public float score(int doc, int freq) {
// We have to supply something in case norms are omitted
return SimilarityBase.this.score(stats, freq,
norms == null ? 1F : decodeNormValue((byte)norms.get(doc)));
}
@Override
public Explanation explain(int doc, Explanation freq) {
return SimilarityBase.this.explain(stats, doc, freq,
norms == null ? 1F : decodeNormValue((byte)norms.get(doc)));
}
}
/** Delegates the {@link #score(int, float)} and
* {@link #explain(int, Explanation)} methods to
* {@link SimilarityBase#score(BasicStats, float, float)} and
* {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
* respectively.
*/
private class BasicSloppyDocScorer extends SloppySimScorer {
private class BasicSimScorer extends SimScorer {
private final BasicStats stats;
private final NumericDocValues norms;
BasicSloppyDocScorer(BasicStats stats, NumericDocValues norms) throws IOException {
BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.norms = norms;
}

View File

@ -562,25 +562,6 @@ public abstract class TFIDFSimilarity extends Similarity {
@Override
public abstract float queryNorm(float sumOfSquaredWeights);
/** Computes a score factor based on a term or phrase's frequency in a
* document. This value is multiplied by the {@link #idf(long, long)}
* factor for each term in the query and these products are then summed to
* form the initial score for a document.
*
* <p>Terms and phrases repeated in a document indicate the topic of the
* document, so implementations of this method usually return larger values
* when <code>freq</code> is large, and smaller values when <code>freq</code>
* is small.
*
* <p>The default implementation calls {@link #tf(float)}.
*
* @param freq the frequency of a term within a document
* @return a score factor based on a term's within-document frequency
*/
public float tf(int freq) {
return tf((float)freq);
}
/** Computes a score factor based on a term or phrase's frequency in a
* document. This value is multiplied by the {@link #idf(long, long)}
* factor for each term in the query and these products are then summed to
@ -655,7 +636,7 @@ public abstract class TFIDFSimilarity extends Similarity {
/** Computes a score factor based on a term's document frequency (the number
* of documents which contain the term). This value is multiplied by the
* {@link #tf(int)} factor for each term in the query and these products are
* {@link #tf(float)} factor for each term in the query and these products are
* then summed to form the initial score for a document.
*
* <p>Terms that occur in fewer documents are better indicators of topic, so
@ -755,49 +736,17 @@ public abstract class TFIDFSimilarity extends Similarity {
}
@Override
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
public final SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
IDFStats idfstats = (IDFStats) stats;
return new ExactTFIDFDocScorer(idfstats, context.reader().getNormValues(idfstats.field));
}
@Override
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
IDFStats idfstats = (IDFStats) stats;
return new SloppyTFIDFDocScorer(idfstats, context.reader().getNormValues(idfstats.field));
return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field));
}
// TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
private final class ExactTFIDFDocScorer extends ExactSimScorer {
private final class TFIDFSimScorer extends SimScorer {
private final IDFStats stats;
private final float weightValue;
private final NumericDocValues norms;
ExactTFIDFDocScorer(IDFStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.value;
this.norms = norms;
}
@Override
public float score(int doc, int freq) {
final float raw = tf(freq)*weightValue; // compute tf(f)*weight
return norms == null ? raw : raw * decodeNormValue((byte)norms.get(doc)); // normalize for field
}
@Override
public Explanation explain(int doc, Explanation freq) {
return explainScore(doc, freq, stats, norms);
}
}
private final class SloppyTFIDFDocScorer extends SloppySimScorer {
private final IDFStats stats;
private final float weightValue;
private final NumericDocValues norms;
SloppyTFIDFDocScorer(IDFStats stats, NumericDocValues norms) throws IOException {
TFIDFSimScorer(IDFStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.value;
this.norms = norms;

View File

@ -34,9 +34,9 @@ public class SpanScorer extends Scorer {
protected int doc;
protected float freq;
protected int numMatches;
protected final Similarity.SloppySimScorer docScorer;
protected final Similarity.SimScorer docScorer;
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
throws IOException {
super(weight);
this.docScorer = docScorer;

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@ -86,7 +86,7 @@ public class SpanWeight extends Weight {
if (stats == null) {
return null;
} else {
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
}
}
@ -97,7 +97,7 @@ public class SpanWeight extends Weight {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -112,12 +112,7 @@ public class TestCustomNorms extends LuceneTestCase {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -179,12 +179,7 @@ public class TestNorms extends LuceneTestCase {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -110,12 +110,7 @@ public class TestUniqueTermCount extends LuceneTestCase {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -270,12 +270,7 @@ final class JustCompileSearch {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) {
public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}

View File

@ -109,18 +109,8 @@ public class TestConjunctions extends LuceneTestCase {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
return new ExactSimScorer() {
@Override
public float score(int doc, int freq) {
return freq;
}
};
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
return new SloppySimScorer() {
public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
return new SimScorer() {
@Override
public float score(int doc, float freq) {
return freq;

View File

@ -156,34 +156,11 @@ public class TestDocValuesScoring extends LuceneTestCase {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final ExactSimScorer sub = sim.exactSimScorer(stats, context);
final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);
return new ExactSimScorer() {
@Override
public float score(int doc, int freq) {
return values.get(doc) * sub.score(doc, freq);
}
@Override
public Explanation explain(int doc, Explanation freq) {
Explanation boostExplanation = new Explanation(values.get(doc), "indexDocValue(" + boostField + ")");
Explanation simExplanation = sub.explain(doc, freq);
Explanation expl = new Explanation(boostExplanation.getValue() * simExplanation.getValue(), "product of:");
expl.addDetail(boostExplanation);
expl.addDetail(simExplanation);
return expl;
}
};
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final SloppySimScorer sub = sim.sloppySimScorer(stats, context);
public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final SimScorer sub = sim.simScorer(stats, context);
final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);
return new SloppySimScorer() {
return new SimScorer() {
@Override
public float score(int doc, float freq) {
return values.get(doc) * sub.score(doc, freq);

View File

@ -37,7 +37,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity.SimWeight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@ -274,7 +274,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
final int maxDoc;
final Set<Long> ords = new HashSet<Long>();
final ExactSimScorer[] sims;
final SimScorer[] sims;
final int minNrShouldMatch;
double score = Float.NaN;
@ -285,7 +285,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
this.maxDoc = reader.maxDoc();
BooleanQuery bq = (BooleanQuery) weight.getQuery();
this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
this.sims = new ExactSimScorer[(int)dv.getValueCount()];
this.sims = new SimScorer[(int)dv.getValueCount()];
for (BooleanClause clause : bq.getClauses()) {
assert !clause.isProhibited();
assert !clause.isRequired();
@ -300,7 +300,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
searcher.termStatistics(term, context));
w.getValueForNormalization(); // ignored
w.normalize(1F, 1F);
sims[(int)ord] = weight.similarity.exactSimScorer(w, reader.getContext());
sims[(int)ord] = weight.similarity.simScorer(w, reader.getContext());
}
}
}

View File

@ -148,7 +148,7 @@ final class JustCompileSearchSpans {
static final class JustCompileSpanScorer extends SpanScorer {
protected JustCompileSpanScorer(Spans spans, Weight weight,
Similarity.SloppySimScorer docScorer) throws IOException {
Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
}

View File

@ -158,7 +158,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
* @see #baselineTf
*/
@Override
public float tf(int freq) {
public float tf(float freq) {
return baselineTf(freq);
}

View File

@ -98,13 +98,8 @@ public abstract class SorterTestBase extends LuceneTestCase {
}
@Override
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
return in.exactSimScorer(weight, context);
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
return in.sloppySimScorer(weight, context);
public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
return in.simScorer(weight, context);
}
}

View File

@ -246,7 +246,7 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
SweetSpotSimilarity ss = new SweetSpotSimilarity() {
@Override
public float tf(int freq) {
public float tf(float freq) {
return hyperbolicTf(freq);
}
};

View File

@ -29,7 +29,7 @@ import java.io.IOException;
import java.util.Map;
/**
* Function that returns {@link TFIDFSimilarity#tf(int)}
* Function that returns {@link TFIDFSimilarity#tf(float)}
* for every document.
* <p>
* Note that the configured Similarity for the field must be

View File

@ -180,7 +180,7 @@ public class SweetSpotSimilarityFactory extends DefaultSimilarityFactory {
private static final class HyperbolicSweetSpotSimilarity
extends SweetSpotSimilarity {
@Override
public float tf(int freq) {
public float tf(float freq) {
return hyperbolicTf(freq);
}
};