mirror of https://github.com/apache/lucene.git
LUCENE-3749: Similarity.java javadocs and simplifications for 4.0
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1239941 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d80355fd21
commit
8fbd9d7673
|
@ -28,7 +28,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
|
||||
import org.apache.lucene.search.TermQuery.TermWeight;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
@ -362,7 +362,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
if (termsEnum == null) {
|
||||
return null;
|
||||
}
|
||||
final ExactDocScorer docScorer = weight.createDocScorer(context);
|
||||
final ExactSimScorer docScorer = weight.createDocScorer(context);
|
||||
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
|
||||
if (docsAndFreqsEnum == null) {
|
||||
// TODO: we could carry over TermState from the
|
||||
|
@ -394,7 +394,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
if (termsEnum == null) {
|
||||
return null;
|
||||
}
|
||||
final ExactDocScorer docScorer = weight.createDocScorer(context);
|
||||
final ExactSimScorer docScorer = weight.createDocScorer(context);
|
||||
docsAndFreqs[i] = new DocsAndFreqs(null,
|
||||
termsEnum.docs(acceptDocs, null, false),
|
||||
termsEnum.docFreq(), docScorer);
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/** Scorer for conjunctions, sets of terms, all of which are required. */
|
||||
|
@ -100,10 +100,10 @@ class ConjunctionTermScorer extends Scorer {
|
|||
final DocsEnum docsAndFreqs;
|
||||
final DocsEnum docs;
|
||||
final int docFreq;
|
||||
final ExactDocScorer docScorer;
|
||||
final ExactSimScorer docScorer;
|
||||
int doc = -1;
|
||||
|
||||
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) {
|
||||
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactSimScorer docScorer) {
|
||||
this.docsAndFreqs = docsAndFreqs;
|
||||
this.docs = docs;
|
||||
this.docFreq = docFreq;
|
||||
|
|
|
@ -55,10 +55,10 @@ final class ExactPhraseScorer extends Scorer {
|
|||
private int docID = -1;
|
||||
private int freq;
|
||||
|
||||
private final Similarity.ExactDocScorer docScorer;
|
||||
private final Similarity.ExactSimScorer docScorer;
|
||||
|
||||
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity.ExactDocScorer docScorer) throws IOException {
|
||||
Similarity.ExactSimScorer docScorer) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
|
||||
final class MatchOnlyTermScorer extends Scorer {
|
||||
private final DocsEnum docsEnum;
|
||||
private final Similarity.ExactDocScorer docScorer;
|
||||
private final Similarity.ExactSimScorer docScorer;
|
||||
|
||||
/**
|
||||
* Construct a <code>TermScorer</code>.
|
||||
|
@ -39,10 +39,10 @@ final class MatchOnlyTermScorer extends Scorer {
|
|||
* @param td
|
||||
* An iterator over the documents matching the <code>Term</code>.
|
||||
* @param docScorer
|
||||
* The </code>Similarity.ExactDocScorer</code> implementation
|
||||
* The </code>Similarity.ExactSimScorer</code> implementation
|
||||
* to be used for score computations.
|
||||
*/
|
||||
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException {
|
||||
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.docsEnum = td;
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -137,7 +137,7 @@ public class MultiPhraseQuery extends Query {
|
|||
|
||||
private class MultiPhraseWeight extends Weight {
|
||||
private final Similarity similarity;
|
||||
private final Similarity.Stats stats;
|
||||
private final Similarity.SimWeight stats;
|
||||
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||
|
||||
public MultiPhraseWeight(IndexSearcher searcher)
|
||||
|
@ -157,8 +157,9 @@ public class MultiPhraseQuery extends Query {
|
|||
allTermStats.add(searcher.termStatistics(term, termContext));
|
||||
}
|
||||
}
|
||||
stats = similarity.computeStats(searcher.collectionStatistics(field),
|
||||
getBoost(), allTermStats.toArray(new TermStatistics[allTermStats.size()]));
|
||||
stats = similarity.computeWeight(getBoost(),
|
||||
searcher.collectionStatistics(field),
|
||||
allTermStats.toArray(new TermStatistics[allTermStats.size()]));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -246,14 +247,14 @@ public class MultiPhraseQuery extends Query {
|
|||
}
|
||||
|
||||
if (slop == 0) {
|
||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context));
|
||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
|
||||
if (s.noDocs) {
|
||||
return null;
|
||||
} else {
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context));
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -264,7 +265,7 @@ public class MultiPhraseQuery extends Query {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context);
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -183,7 +183,7 @@ public class PhraseQuery extends Query {
|
|||
|
||||
private class PhraseWeight extends Weight {
|
||||
private final Similarity similarity;
|
||||
private final Similarity.Stats stats;
|
||||
private final Similarity.SimWeight stats;
|
||||
private transient TermContext states[];
|
||||
|
||||
public PhraseWeight(IndexSearcher searcher)
|
||||
|
@ -197,7 +197,7 @@ public class PhraseQuery extends Query {
|
|||
states[i] = TermContext.build(context, term, true);
|
||||
termStats[i] = searcher.termStatistics(term, states[i]);
|
||||
}
|
||||
stats = similarity.computeStats(searcher.collectionStatistics(field), getBoost(), termStats);
|
||||
stats = similarity.computeWeight(getBoost(), searcher.collectionStatistics(field), termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -258,7 +258,7 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
|
||||
if (slop == 0) { // optimize exact case
|
||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context));
|
||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
|
||||
if (s.noDocs) {
|
||||
return null;
|
||||
} else {
|
||||
|
@ -266,7 +266,7 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
} else {
|
||||
return
|
||||
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context));
|
||||
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -282,7 +282,7 @@ public class PhraseQuery extends Query {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context);
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
|
|
|
@ -36,10 +36,10 @@ abstract class PhraseScorer extends Scorer {
|
|||
|
||||
private float freq; //phrase frequency in current doc as computed by phraseFreq().
|
||||
|
||||
final Similarity.SloppyDocScorer docScorer;
|
||||
final Similarity.SloppySimScorer docScorer;
|
||||
|
||||
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity.SloppyDocScorer docScorer) {
|
||||
Similarity.SloppySimScorer docScorer) {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
|
||||
|
||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
int slop, Similarity.SloppyDocScorer docScorer) {
|
||||
int slop, Similarity.SloppySimScorer docScorer) {
|
||||
super(weight, postings, docScorer);
|
||||
this.slop = slop;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.IndexReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -45,7 +45,7 @@ public class TermQuery extends Query {
|
|||
|
||||
final class TermWeight extends Weight {
|
||||
private final Similarity similarity;
|
||||
private final Similarity.Stats stats;
|
||||
private final Similarity.SimWeight stats;
|
||||
private final TermContext termStates;
|
||||
|
||||
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
||||
|
@ -53,9 +53,9 @@ public class TermQuery extends Query {
|
|||
assert termStates != null : "TermContext must not be null";
|
||||
this.termStates = termStates;
|
||||
this.similarity = searcher.getSimilarityProvider().get(term.field());
|
||||
this.stats = similarity.computeStats(
|
||||
searcher.collectionStatistics(term.field()),
|
||||
this.stats = similarity.computeWeight(
|
||||
getBoost(),
|
||||
searcher.collectionStatistics(term.field()),
|
||||
searcher.termStatistics(term, termStates));
|
||||
}
|
||||
|
||||
|
@ -95,10 +95,10 @@ public class TermQuery extends Query {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates an {@link ExactDocScorer} for this {@link TermWeight}*/
|
||||
ExactDocScorer createDocScorer(AtomicReaderContext context)
|
||||
* Creates an {@link ExactSimScorer} for this {@link TermWeight}*/
|
||||
ExactSimScorer createDocScorer(AtomicReaderContext context)
|
||||
throws IOException {
|
||||
return similarity.exactDocScorer(stats, term.field(), context);
|
||||
return similarity.exactSimScorer(stats, context);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -130,7 +130,7 @@ public class TermQuery extends Query {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context);
|
||||
ExactSimScorer docScorer = similarity.exactSimScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
*/
|
||||
final class TermScorer extends Scorer {
|
||||
private final DocsEnum docsEnum;
|
||||
private final Similarity.ExactDocScorer docScorer;
|
||||
private final Similarity.ExactSimScorer docScorer;
|
||||
|
||||
/**
|
||||
* Construct a <code>TermScorer</code>.
|
||||
|
@ -36,10 +36,10 @@ final class TermScorer extends Scorer {
|
|||
* @param td
|
||||
* An iterator over the documents matching the <code>Term</code>.
|
||||
* @param docScorer
|
||||
* The </code>Similarity.ExactDocScorer</code> implementation
|
||||
* The </code>Similarity.ExactSimScorer</code> implementation
|
||||
* to be used for score computations.
|
||||
*/
|
||||
TermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException {
|
||||
TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.docsEnum = td;
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.search.spans.NearSpansOrdered;
|
||||
import org.apache.lucene.search.spans.NearSpansUnordered;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
|
@ -53,7 +53,7 @@ import java.util.Iterator;
|
|||
* <p/>
|
||||
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
||||
*
|
||||
* @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||
* @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||
*/
|
||||
public class PayloadNearQuery extends SpanNearQuery {
|
||||
protected String fieldName;
|
||||
|
@ -151,7 +151,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
||||
similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
similarity, similarity.sloppySimScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -161,7 +161,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context);
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
Explanation expl = new Explanation();
|
||||
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
|
@ -189,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
private int payloadsSeen;
|
||||
|
||||
protected PayloadNearSpanScorer(Spans spans, Weight weight,
|
||||
Similarity similarity, Similarity.SloppyDocScorer docScorer) throws IOException {
|
||||
Similarity similarity, Similarity.SloppySimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
this.spans = spans;
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.search.ComplexExplanation;
|
|||
import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.search.spans.TermSpans;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
|
@ -49,7 +49,7 @@ import java.io.IOException;
|
|||
* which returns 1 by default.
|
||||
* <p/>
|
||||
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
||||
* @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||
* @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||
**/
|
||||
public class PayloadTermQuery extends SpanTermQuery {
|
||||
protected PayloadFunction function;
|
||||
|
@ -82,7 +82,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
||||
this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
this, similarity.sloppySimScorer(stats, context));
|
||||
}
|
||||
|
||||
protected class PayloadTermSpanScorer extends SpanScorer {
|
||||
|
@ -91,7 +91,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
protected int payloadsSeen;
|
||||
private final TermSpans termSpans;
|
||||
|
||||
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppyDocScorer docScorer) throws IOException {
|
||||
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
termSpans = spans;
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context);
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
Explanation expl = new Explanation();
|
||||
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
|
|
|
@ -153,7 +153,7 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
||||
|
||||
float avgdl = avgFieldLength(collectionStats);
|
||||
|
@ -163,23 +163,25 @@ public class BM25Similarity extends Similarity {
|
|||
for (int i = 0; i < cache.length; i++) {
|
||||
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
|
||||
}
|
||||
return new BM25Stats(idf, queryBoost, avgdl, cache);
|
||||
return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
final DocValues norms = context.reader().normValues(fieldName);
|
||||
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
BM25Stats bm25stats = (BM25Stats) stats;
|
||||
final DocValues norms = context.reader().normValues(bm25stats.field);
|
||||
return norms == null
|
||||
? new ExactBM25DocScorerNoNorms((BM25Stats)stats)
|
||||
: new ExactBM25DocScorer((BM25Stats)stats, norms);
|
||||
? new ExactBM25DocScorerNoNorms(bm25stats)
|
||||
: new ExactBM25DocScorer(bm25stats, norms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
return new SloppyBM25DocScorer((BM25Stats) stats, context.reader().normValues(fieldName));
|
||||
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
BM25Stats bm25stats = (BM25Stats) stats;
|
||||
return new SloppyBM25DocScorer(bm25stats, context.reader().normValues(bm25stats.field));
|
||||
}
|
||||
|
||||
private class ExactBM25DocScorer extends ExactDocScorer {
|
||||
private class ExactBM25DocScorer extends ExactSimScorer {
|
||||
private final BM25Stats stats;
|
||||
private final float weightValue;
|
||||
private final byte[] norms;
|
||||
|
@ -205,7 +207,7 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
|
||||
/** there are no norms, we act as if b=0 */
|
||||
private class ExactBM25DocScorerNoNorms extends ExactDocScorer {
|
||||
private class ExactBM25DocScorerNoNorms extends ExactSimScorer {
|
||||
private final BM25Stats stats;
|
||||
private final float weightValue;
|
||||
private static final int SCORE_CACHE_SIZE = 32;
|
||||
|
@ -232,7 +234,7 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
}
|
||||
|
||||
private class SloppyBM25DocScorer extends SloppyDocScorer {
|
||||
private class SloppyBM25DocScorer extends SloppySimScorer {
|
||||
private final BM25Stats stats;
|
||||
private final float weightValue; // boost * idf * (k1 + 1)
|
||||
private final byte[] norms;
|
||||
|
@ -269,7 +271,7 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
|
||||
/** Collection statistics for the BM25 model. */
|
||||
private static class BM25Stats extends Stats {
|
||||
private static class BM25Stats extends SimWeight {
|
||||
/** BM25's idf */
|
||||
private final Explanation idf;
|
||||
/** The average document length. */
|
||||
|
@ -280,10 +282,13 @@ public class BM25Similarity extends Similarity {
|
|||
private float topLevelBoost;
|
||||
/** weight (idf * boost) */
|
||||
private float weight;
|
||||
/** field name, for pulling norms */
|
||||
private final String field;
|
||||
/** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
|
||||
private final float cache[];
|
||||
|
||||
BM25Stats(Explanation idf, float queryBoost, float avgdl, float cache[]) {
|
||||
BM25Stats(String field, Explanation idf, float queryBoost, float avgdl, float cache[]) {
|
||||
this.field = field;
|
||||
this.idf = idf;
|
||||
this.queryBoost = queryBoost;
|
||||
this.avgdl = avgdl;
|
||||
|
|
|
@ -23,7 +23,8 @@ import org.apache.lucene.index.Terms;
|
|||
* Stores all statistics commonly used ranking methods.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BasicStats extends Similarity.Stats {
|
||||
public class BasicStats extends Similarity.SimWeight {
|
||||
final String field;
|
||||
/** The number of documents. */
|
||||
protected long numberOfDocuments;
|
||||
/** The total number of tokens in the field. */
|
||||
|
@ -47,7 +48,8 @@ public class BasicStats extends Similarity.Stats {
|
|||
protected float totalBoost;
|
||||
|
||||
/** Constructor. Sets the query boost. */
|
||||
public BasicStats(float queryBoost) {
|
||||
public BasicStats(String field, float queryBoost) {
|
||||
this.field = field;
|
||||
this.queryBoost = queryBoost;
|
||||
this.totalBoost = queryBoost;
|
||||
}
|
||||
|
|
|
@ -51,8 +51,8 @@ public abstract class LMSimilarity extends SimilarityBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected BasicStats newStats(float queryBoost) {
|
||||
return new LMStats(queryBoost);
|
||||
protected BasicStats newStats(String field, float queryBoost) {
|
||||
return new LMStats(field, queryBoost);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -102,8 +102,8 @@ public abstract class LMSimilarity extends SimilarityBase {
|
|||
/** The probability that the current term is generated by the collection. */
|
||||
private float collectionProbability;
|
||||
|
||||
public LMStats(float queryBoost) {
|
||||
super(queryBoost);
|
||||
public LMStats(String field, float queryBoost) {
|
||||
super(field, queryBoost);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -46,43 +46,43 @@ public class MultiSimilarity extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
Stats subStats[] = new Stats[sims.length];
|
||||
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
SimWeight subStats[] = new SimWeight[sims.length];
|
||||
for (int i = 0; i < subStats.length; i++) {
|
||||
subStats[i] = sims[i].computeStats(collectionStats, queryBoost, termStats);
|
||||
subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
|
||||
}
|
||||
return new MultiStats(subStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
ExactDocScorer subScorers[] = new ExactDocScorer[sims.length];
|
||||
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
|
||||
for (int i = 0; i < subScorers.length; i++) {
|
||||
subScorers[i] = sims[i].exactDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
|
||||
subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
|
||||
}
|
||||
return new MultiExactDocScorer(subScorers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
SloppyDocScorer subScorers[] = new SloppyDocScorer[sims.length];
|
||||
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
|
||||
for (int i = 0; i < subScorers.length; i++) {
|
||||
subScorers[i] = sims[i].sloppyDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
|
||||
subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
|
||||
}
|
||||
return new MultiSloppyDocScorer(subScorers);
|
||||
}
|
||||
|
||||
public static class MultiExactDocScorer extends ExactDocScorer {
|
||||
private final ExactDocScorer subScorers[];
|
||||
public static class MultiExactDocScorer extends ExactSimScorer {
|
||||
private final ExactSimScorer subScorers[];
|
||||
|
||||
MultiExactDocScorer(ExactDocScorer subScorers[]) {
|
||||
MultiExactDocScorer(ExactSimScorer subScorers[]) {
|
||||
this.subScorers = subScorers;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score(int doc, int freq) {
|
||||
float sum = 0.0f;
|
||||
for (ExactDocScorer subScorer : subScorers) {
|
||||
for (ExactSimScorer subScorer : subScorers) {
|
||||
sum += subScorer.score(doc, freq);
|
||||
}
|
||||
return sum;
|
||||
|
@ -91,24 +91,24 @@ public class MultiSimilarity extends Similarity {
|
|||
@Override
|
||||
public Explanation explain(int doc, Explanation freq) {
|
||||
Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
|
||||
for (ExactDocScorer subScorer : subScorers) {
|
||||
for (ExactSimScorer subScorer : subScorers) {
|
||||
expl.addDetail(subScorer.explain(doc, freq));
|
||||
}
|
||||
return expl;
|
||||
}
|
||||
}
|
||||
|
||||
public static class MultiSloppyDocScorer extends SloppyDocScorer {
|
||||
private final SloppyDocScorer subScorers[];
|
||||
public static class MultiSloppyDocScorer extends SloppySimScorer {
|
||||
private final SloppySimScorer subScorers[];
|
||||
|
||||
MultiSloppyDocScorer(SloppyDocScorer subScorers[]) {
|
||||
MultiSloppyDocScorer(SloppySimScorer subScorers[]) {
|
||||
this.subScorers = subScorers;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score(int doc, float freq) {
|
||||
float sum = 0.0f;
|
||||
for (SloppyDocScorer subScorer : subScorers) {
|
||||
for (SloppySimScorer subScorer : subScorers) {
|
||||
sum += subScorer.score(doc, freq);
|
||||
}
|
||||
return sum;
|
||||
|
@ -117,7 +117,7 @@ public class MultiSimilarity extends Similarity {
|
|||
@Override
|
||||
public Explanation explain(int doc, Explanation freq) {
|
||||
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
|
||||
for (SloppyDocScorer subScorer : subScorers) {
|
||||
for (SloppySimScorer subScorer : subScorers) {
|
||||
expl.addDetail(subScorer.explain(doc, freq));
|
||||
}
|
||||
return expl;
|
||||
|
@ -134,17 +134,17 @@ public class MultiSimilarity extends Similarity {
|
|||
}
|
||||
}
|
||||
|
||||
public static class MultiStats extends Stats {
|
||||
final Stats subStats[];
|
||||
public static class MultiStats extends SimWeight {
|
||||
final SimWeight subStats[];
|
||||
|
||||
MultiStats(Stats subStats[]) {
|
||||
MultiStats(SimWeight subStats[]) {
|
||||
this.subStats = subStats;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() {
|
||||
float sum = 0.0f;
|
||||
for (Stats stat : subStats) {
|
||||
for (SimWeight stat : subStats) {
|
||||
sum += stat.getValueForNormalization();
|
||||
}
|
||||
return sum / subStats.length;
|
||||
|
@ -152,7 +152,7 @@ public class MultiSimilarity extends Similarity {
|
|||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
for (Stats stat : subStats) {
|
||||
for (SimWeight stat : subStats) {
|
||||
stat.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,16 +17,13 @@ package org.apache.lucene.search.similarities;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.DocValuesField; // javadoc
|
||||
import org.apache.lucene.index.AtomicReader; // javadoc
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader; // javadoc
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.Terms; // javadoc
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
@ -39,7 +36,6 @@ import org.apache.lucene.search.spans.SpanQuery; // javadoc
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat; // javadoc
|
||||
|
||||
|
||||
/**
|
||||
* Similarity defines the components of Lucene scoring.
|
||||
* <p>
|
||||
|
@ -59,21 +55,21 @@ import org.apache.lucene.util.SmallFloat; // javadoc
|
|||
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing
|
||||
* the Similarity implementation to set a per-document value for the field that will
|
||||
* be later accessible via {@link AtomicReader#normValues(String)}. Lucene makes no assumption
|
||||
* about what is in this byte, but it is most useful for encoding length normalization
|
||||
* about what is in this norm, but it is most useful for encoding length normalization
|
||||
* information.
|
||||
* <p>
|
||||
* Implementations should carefully consider how the normalization byte is encoded: while
|
||||
* Implementations should carefully consider how the normalization is encoded: while
|
||||
* Lucene's classical {@link TFIDFSimilarity} encodes a combination of index-time boost
|
||||
* and length normalization information with {@link SmallFloat}, this might not be suitable
|
||||
* for all purposes.
|
||||
* and length normalization information with {@link SmallFloat} into a single byte, this
|
||||
* might not be suitable for all purposes.
|
||||
* <p>
|
||||
* Many formulas require the use of average document length, which can be computed via a
|
||||
* combination of {@link Terms#getSumTotalTermFreq()} and {@link IndexReader#maxDoc()},
|
||||
* combination of {@link CollectionStatistics#sumTotalTermFreq()} and
|
||||
* {@link CollectionStatistics#maxDoc()} or {@link CollectionStatistics#docCount()},
|
||||
* depending upon whether the average should reflect field sparsity.
|
||||
* <p>
|
||||
* Because index-time boost is handled entirely at the application level anyway,
|
||||
* an application can alternatively store the index-time boost separately using an
|
||||
* {@link DocValuesField}, and access this at query-time with
|
||||
* {@link AtomicReader#docValues(String)}.
|
||||
* Additional scoring factors can be stored in named {@link DocValuesField}s, and accessed
|
||||
* at query-time with {@link AtomicReader#docValues(String)}.
|
||||
* <p>
|
||||
* Finally, using index-time boosts (either via folding into the normalization byte or
|
||||
* via DocValues), is an inefficient way to boost the scores of different fields if the
|
||||
|
@ -84,19 +80,19 @@ import org.apache.lucene.util.SmallFloat; // javadoc
|
|||
* <a name="querytime"/>
|
||||
* At query-time, Queries interact with the Similarity via these steps:
|
||||
* <ol>
|
||||
* <li>The {@link #computeStats(CollectionStatistics, float, TermStatistics...)} method is called a single time,
|
||||
* <li>The {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} method is called a single time,
|
||||
* allowing the implementation to compute any statistics (such as IDF, average document length, etc)
|
||||
* across <i>the entire collection</i>. The {@link TermStatistics} passed in already contain
|
||||
* the raw statistics involved, so a Similarity can freely use any combination
|
||||
* of term statistics without causing any additional I/O. Lucene makes no assumption about what is
|
||||
* stored in the returned {@link Similarity.Stats} object.
|
||||
* <li>The query normalization process occurs a single time: {@link Similarity.Stats#getValueForNormalization()}
|
||||
* across <i>the entire collection</i>. The {@link TermStatistics} and {@link CollectionStatistics} passed in
|
||||
* already contain all of the raw statistics involved, so a Similarity can freely use any combination
|
||||
* of statistics without causing any additional I/O. Lucene makes no assumption about what is
|
||||
* stored in the returned {@link Similarity.SimWeight} object.
|
||||
* <li>The query normalization process occurs a single time: {@link Similarity.SimWeight#getValueForNormalization()}
|
||||
* is called for each query leaf node, {@link SimilarityProvider#queryNorm(float)} is called for the top-level
|
||||
* query, and finally {@link Similarity.Stats#normalize(float, float)} passes down the normalization value
|
||||
* query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value
|
||||
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
|
||||
* <li>For each segment in the index, the Query creates a {@link #exactDocScorer(Stats, String, AtomicReaderContext)}
|
||||
* <li>For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)}
|
||||
* (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
|
||||
* {@link #sloppyDocScorer(Stats, String, AtomicReaderContext)} (for queries with sloppy frequencies such as
|
||||
* {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as
|
||||
* SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
|
||||
* </ol>
|
||||
* <p>
|
||||
|
@ -130,27 +126,40 @@ public abstract class Similarity {
|
|||
public abstract void computeNorm(FieldInvertState state, Norm norm);
|
||||
|
||||
/**
|
||||
* Compute any collection-level stats (e.g. IDF, average document length, etc) needed for scoring a query.
|
||||
* Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query.
|
||||
*
|
||||
* @param queryBoost the query-time boost.
|
||||
* @param collectionStats collection-level statistics, such as the number of tokens in the collection.
|
||||
* @param termStats term-level statistics, such as the document frequency of a term across the collection.
|
||||
* @return SimWeight object with the information this Similarity needs to score a query.
|
||||
*/
|
||||
public abstract Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats);
|
||||
public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats);
|
||||
|
||||
/**
|
||||
* returns a new {@link Similarity.ExactDocScorer}.
|
||||
* Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index.
|
||||
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
|
||||
* @param context segment of the inverted index to be scored.
|
||||
* @return ExactSimScorer for scoring documents across <code>context</code>
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException;
|
||||
public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* returns a new {@link Similarity.SloppyDocScorer}.
|
||||
* Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index.
|
||||
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
|
||||
* @param context segment of the inverted index to be scored.
|
||||
* @return SloppySimScorer for scoring documents across <code>context</code>
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException;
|
||||
public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* API for scoring exact queries such as {@link TermQuery} and
|
||||
* exact {@link PhraseQuery}.
|
||||
* <p>
|
||||
* Term frequencies are integers (the term or phrase's tf)
|
||||
* Frequencies are integers (the term or phrase frequency within the document)
|
||||
*/
|
||||
public static abstract class ExactDocScorer {
|
||||
public static abstract class ExactSimScorer {
|
||||
/**
|
||||
* Score a single document
|
||||
* @param doc document id
|
||||
|
@ -177,12 +186,14 @@ public abstract class Similarity {
|
|||
* API for scoring "sloppy" queries such as {@link SpanQuery} and
|
||||
* sloppy {@link PhraseQuery}.
|
||||
* <p>
|
||||
* Term frequencies are floating point values.
|
||||
* Frequencies are floating-point values: an approximate
|
||||
* within-document frequency adjusted for "sloppiness" by
|
||||
* {@link SloppySimScorer#computeSlopFactor(int)}.
|
||||
*/
|
||||
public static abstract class SloppyDocScorer {
|
||||
public static abstract class SloppySimScorer {
|
||||
/**
|
||||
* Score a single document
|
||||
* @param doc document id
|
||||
* @param doc document id within the inverted index segment
|
||||
* @param freq sloppy term frequency
|
||||
* @return document's score
|
||||
*/
|
||||
|
@ -196,7 +207,7 @@ public abstract class Similarity {
|
|||
|
||||
/**
|
||||
* Explain the score for a single document
|
||||
* @param doc document id
|
||||
* @param doc document id within the inverted index segment
|
||||
* @param freq Explanation of how the sloppy term frequency was computed
|
||||
* @return document's score
|
||||
*/
|
||||
|
@ -208,12 +219,12 @@ public abstract class Similarity {
|
|||
}
|
||||
}
|
||||
|
||||
/** Stores the statistics for the indexed collection. This abstract
|
||||
/** Stores the weight for a query across the indexed collection. This abstract
|
||||
* implementation is empty; descendants of {@code Similarity} should
|
||||
* subclass {@code Stats} and define the statistics they require in the
|
||||
* subclass {@code SimWeight} and define the statistics they require in the
|
||||
* subclass. Examples include idf, average field length, etc.
|
||||
*/
|
||||
public static abstract class Stats {
|
||||
public static abstract class SimWeight {
|
||||
|
||||
/** The value for normalization of contained query clauses (e.g. sum of squared weights).
|
||||
* <p>
|
||||
|
|
|
@ -70,18 +70,18 @@ public abstract class SimilarityBase extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
BasicStats stats[] = new BasicStats[termStats.length];
|
||||
for (int i = 0; i < termStats.length; i++) {
|
||||
stats[i] = newStats(queryBoost);
|
||||
stats[i] = newStats(collectionStats.field(), queryBoost);
|
||||
fillBasicStats(stats[i], collectionStats, termStats[i]);
|
||||
}
|
||||
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
|
||||
}
|
||||
|
||||
/** Factory method to return a custom stats object */
|
||||
protected BasicStats newStats(float queryBoost) {
|
||||
return new BasicStats(queryBoost);
|
||||
protected BasicStats newStats(String field, float queryBoost) {
|
||||
return new BasicStats(field, queryBoost);
|
||||
}
|
||||
|
||||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||
|
@ -179,40 +179,38 @@ public abstract class SimilarityBase extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
|
||||
AtomicReaderContext context) throws IOException {
|
||||
DocValues norms = context.reader().normValues(fieldName);
|
||||
|
||||
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
if (stats instanceof MultiSimilarity.MultiStats) {
|
||||
// a multi term query (e.g. phrase). return the summation,
|
||||
// scoring almost as if it were boolean query
|
||||
Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
||||
ExactDocScorer subScorers[] = new ExactDocScorer[subStats.length];
|
||||
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
||||
ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
|
||||
for (int i = 0; i < subScorers.length; i++) {
|
||||
subScorers[i] = new BasicExactDocScorer((BasicStats)subStats[i], norms);
|
||||
BasicStats basicstats = (BasicStats) subStats[i];
|
||||
subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||
}
|
||||
return new MultiSimilarity.MultiExactDocScorer(subScorers);
|
||||
} else {
|
||||
return new BasicExactDocScorer((BasicStats) stats, norms);
|
||||
BasicStats basicstats = (BasicStats) stats;
|
||||
return new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
|
||||
AtomicReaderContext context) throws IOException {
|
||||
DocValues norms = context.reader().normValues(fieldName);
|
||||
|
||||
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
if (stats instanceof MultiSimilarity.MultiStats) {
|
||||
// a multi term query (e.g. phrase). return the summation,
|
||||
// scoring almost as if it were boolean query
|
||||
Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
||||
SloppyDocScorer subScorers[] = new SloppyDocScorer[subStats.length];
|
||||
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
||||
SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
|
||||
for (int i = 0; i < subScorers.length; i++) {
|
||||
subScorers[i] = new BasicSloppyDocScorer((BasicStats)subStats[i], norms);
|
||||
BasicStats basicstats = (BasicStats) subStats[i];
|
||||
subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||
}
|
||||
return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
|
||||
} else {
|
||||
return new BasicSloppyDocScorer((BasicStats) stats, norms);
|
||||
BasicStats basicstats = (BasicStats) stats;
|
||||
return new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,7 +272,7 @@ public abstract class SimilarityBase extends Similarity {
|
|||
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
|
||||
* respectively.
|
||||
*/
|
||||
private class BasicExactDocScorer extends ExactDocScorer {
|
||||
private class BasicExactDocScorer extends ExactSimScorer {
|
||||
private final BasicStats stats;
|
||||
private final byte[] norms;
|
||||
|
||||
|
@ -303,7 +301,7 @@ public abstract class SimilarityBase extends Similarity {
|
|||
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
|
||||
* respectively.
|
||||
*/
|
||||
private class BasicSloppyDocScorer extends SloppyDocScorer {
|
||||
private class BasicSloppyDocScorer extends SloppySimScorer {
|
||||
private final BasicStats stats;
|
||||
private final byte[] norms;
|
||||
|
||||
|
|
|
@ -694,26 +694,28 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
public abstract float scorePayload(int doc, int start, int end, BytesRef payload);
|
||||
|
||||
@Override
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
final Explanation idf = termStats.length == 1
|
||||
? idfExplain(collectionStats, termStats[0])
|
||||
: idfExplain(collectionStats, termStats);
|
||||
return new IDFStats(idf, queryBoost);
|
||||
return new IDFStats(collectionStats.field(), idf, queryBoost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
return new ExactTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName));
|
||||
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
IDFStats idfstats = (IDFStats) stats;
|
||||
return new ExactTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
return new SloppyTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName));
|
||||
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
IDFStats idfstats = (IDFStats) stats;
|
||||
return new SloppyTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
|
||||
}
|
||||
|
||||
// TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
|
||||
|
||||
private final class ExactTFIDFDocScorer extends ExactDocScorer {
|
||||
private final class ExactTFIDFDocScorer extends ExactSimScorer {
|
||||
private final IDFStats stats;
|
||||
private final float weightValue;
|
||||
private final byte[] norms;
|
||||
|
@ -744,7 +746,7 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
}
|
||||
}
|
||||
|
||||
private final class SloppyTFIDFDocScorer extends SloppyDocScorer {
|
||||
private final class SloppyTFIDFDocScorer extends SloppySimScorer {
|
||||
private final IDFStats stats;
|
||||
private final float weightValue;
|
||||
private final byte[] norms;
|
||||
|
@ -780,7 +782,8 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
|
||||
/** Collection statistics for the TF-IDF model. The only statistic of interest
|
||||
* to this model is idf. */
|
||||
private static class IDFStats extends Stats {
|
||||
private static class IDFStats extends SimWeight {
|
||||
private final String field;
|
||||
/** The idf and its explanation */
|
||||
private final Explanation idf;
|
||||
private float queryNorm;
|
||||
|
@ -788,8 +791,9 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
private final float queryBoost;
|
||||
private float value;
|
||||
|
||||
public IDFStats(Explanation idf, float queryBoost) {
|
||||
public IDFStats(String field, Explanation idf, float queryBoost) {
|
||||
// TODO: Validate?
|
||||
this.field = field;
|
||||
this.idf = idf;
|
||||
this.queryBoost = queryBoost;
|
||||
this.queryWeight = idf.getValue() * queryBoost; // compute query weight
|
||||
|
|
|
@ -33,9 +33,9 @@ public class SpanScorer extends Scorer {
|
|||
|
||||
protected int doc;
|
||||
protected float freq;
|
||||
protected final Similarity.SloppyDocScorer docScorer;
|
||||
protected final Similarity.SloppySimScorer docScorer;
|
||||
|
||||
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppyDocScorer docScorer)
|
||||
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
|
||||
throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.index.IndexReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
|
@ -38,7 +38,7 @@ public class SpanWeight extends Weight {
|
|||
protected Similarity similarity;
|
||||
protected Map<Term,TermContext> termContexts;
|
||||
protected SpanQuery query;
|
||||
protected Similarity.Stats stats;
|
||||
protected Similarity.SimWeight stats;
|
||||
|
||||
public SpanWeight(SpanQuery query, IndexSearcher searcher)
|
||||
throws IOException {
|
||||
|
@ -57,9 +57,8 @@ public class SpanWeight extends Weight {
|
|||
termContexts.put(term, state);
|
||||
i++;
|
||||
}
|
||||
stats = similarity.computeStats(
|
||||
stats = similarity.computeWeight(query.getBoost(),
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
query.getBoost(),
|
||||
termStats);
|
||||
}
|
||||
|
||||
|
@ -79,7 +78,7 @@ public class SpanWeight extends Weight {
|
|||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -89,7 +88,7 @@ public class SpanWeight extends Weight {
|
|||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context);
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
|
|
|
@ -191,7 +191,7 @@ final class JustCompileSearch {
|
|||
static final class JustCompilePhraseScorer extends PhraseScorer {
|
||||
|
||||
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity.SloppyDocScorer docScorer) throws IOException {
|
||||
Similarity.SloppySimScorer docScorer) throws IOException {
|
||||
super(weight, postings, docScorer);
|
||||
}
|
||||
|
||||
|
@ -247,17 +247,17 @@ final class JustCompileSearch {
|
|||
static final class JustCompileSimilarity extends Similarity {
|
||||
|
||||
@Override
|
||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
|
|
@ -158,16 +158,16 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
return sim.computeStats(collectionStats, queryBoost, termStats);
|
||||
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return sim.computeWeight(queryBoost, collectionStats, termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
final ExactDocScorer sub = sim.exactDocScorer(stats, fieldName, context);
|
||||
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
final ExactSimScorer sub = sim.exactSimScorer(stats, context);
|
||||
final Source values = context.reader().docValues(boostField).getSource();
|
||||
|
||||
return new ExactDocScorer() {
|
||||
return new ExactSimScorer() {
|
||||
@Override
|
||||
public float score(int doc, int freq) {
|
||||
return (float) values.getFloat(doc) * sub.score(doc, freq);
|
||||
|
@ -186,11 +186,11 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
||||
final SloppyDocScorer sub = sim.sloppyDocScorer(stats, fieldName, context);
|
||||
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||
final SloppySimScorer sub = sim.sloppySimScorer(stats, context);
|
||||
final Source values = context.reader().docValues(boostField).getSource();
|
||||
|
||||
return new SloppyDocScorer() {
|
||||
return new SloppySimScorer() {
|
||||
@Override
|
||||
public float score(int doc, float freq) {
|
||||
return (float) values.getFloat(doc) * sub.score(doc, freq);
|
||||
|
|
|
@ -23,11 +23,9 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
|
@ -40,8 +38,6 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and
|
||||
|
@ -167,7 +163,7 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
|
||||
/** Creates the default statistics object that the specific tests modify. */
|
||||
private BasicStats createStats() {
|
||||
BasicStats stats = new BasicStats(1);
|
||||
BasicStats stats = new BasicStats("spoof", 1);
|
||||
stats.setNumberOfDocuments(NUMBER_OF_DOCUMENTS);
|
||||
stats.setNumberOfFieldTokens(NUMBER_OF_FIELD_TOKENS);
|
||||
stats.setAvgFieldLength(AVG_FIELD_LENGTH);
|
||||
|
@ -177,7 +173,7 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
||||
return new CollectionStatistics("spoof", stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
||||
return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
||||
}
|
||||
|
||||
private TermStatistics toTermStats(BasicStats stats) {
|
||||
|
@ -192,8 +188,8 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
private void unitTestCore(BasicStats stats, float freq, int docLen)
|
||||
throws IOException {
|
||||
for (SimilarityBase sim : sims) {
|
||||
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats),
|
||||
stats.getTotalBoost(),
|
||||
BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(),
|
||||
toCollectionStats(stats),
|
||||
toTermStats(stats));
|
||||
float score = sim.score(realStats, freq, docLen);
|
||||
float explScore = sim.explain(
|
||||
|
@ -525,8 +521,8 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
private void correctnessTestCore(SimilarityBase sim, float gold)
|
||||
throws IOException {
|
||||
BasicStats stats = createStats();
|
||||
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats),
|
||||
stats.getTotalBoost(),
|
||||
BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(),
|
||||
toCollectionStats(stats),
|
||||
toTermStats(stats));
|
||||
float score = sim.score(realStats, FREQ, DOC_LEN);
|
||||
assertEquals(
|
||||
|
|
|
@ -139,7 +139,7 @@ final class JustCompileSearchSpans {
|
|||
static final class JustCompileSpanScorer extends SpanScorer {
|
||||
|
||||
protected JustCompileSpanScorer(Spans spans, Weight weight,
|
||||
Similarity.SloppyDocScorer docScorer) throws IOException {
|
||||
Similarity.SloppySimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue