LUCENE-3749: Similarity.java javadocs and simplifications for 4.0

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1239941 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-02-03 00:01:19 +00:00
parent d80355fd21
commit 8fbd9d7673
25 changed files with 207 additions and 191 deletions

View File

@ -28,7 +28,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs; import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
import org.apache.lucene.search.TermQuery.TermWeight; import org.apache.lucene.search.TermQuery.TermWeight;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
@ -362,7 +362,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
if (termsEnum == null) { if (termsEnum == null) {
return null; return null;
} }
final ExactDocScorer docScorer = weight.createDocScorer(context); final ExactSimScorer docScorer = weight.createDocScorer(context);
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true); final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
if (docsAndFreqsEnum == null) { if (docsAndFreqsEnum == null) {
// TODO: we could carry over TermState from the // TODO: we could carry over TermState from the
@ -394,7 +394,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
if (termsEnum == null) { if (termsEnum == null) {
return null; return null;
} }
final ExactDocScorer docScorer = weight.createDocScorer(context); final ExactSimScorer docScorer = weight.createDocScorer(context);
docsAndFreqs[i] = new DocsAndFreqs(null, docsAndFreqs[i] = new DocsAndFreqs(null,
termsEnum.docs(acceptDocs, null, false), termsEnum.docs(acceptDocs, null, false),
termsEnum.docFreq(), docScorer); termsEnum.docFreq(), docScorer);

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
/** Scorer for conjunctions, sets of terms, all of which are required. */ /** Scorer for conjunctions, sets of terms, all of which are required. */
@ -100,10 +100,10 @@ class ConjunctionTermScorer extends Scorer {
final DocsEnum docsAndFreqs; final DocsEnum docsAndFreqs;
final DocsEnum docs; final DocsEnum docs;
final int docFreq; final int docFreq;
final ExactDocScorer docScorer; final ExactSimScorer docScorer;
int doc = -1; int doc = -1;
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) { DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactSimScorer docScorer) {
this.docsAndFreqs = docsAndFreqs; this.docsAndFreqs = docsAndFreqs;
this.docs = docs; this.docs = docs;
this.docFreq = docFreq; this.docFreq = docFreq;

View File

@ -55,10 +55,10 @@ final class ExactPhraseScorer extends Scorer {
private int docID = -1; private int docID = -1;
private int freq; private int freq;
private final Similarity.ExactDocScorer docScorer; private final Similarity.ExactSimScorer docScorer;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.ExactDocScorer docScorer) throws IOException { Similarity.ExactSimScorer docScorer) throws IOException {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;

View File

@ -29,7 +29,7 @@ import org.apache.lucene.search.similarities.Similarity;
final class MatchOnlyTermScorer extends Scorer { final class MatchOnlyTermScorer extends Scorer {
private final DocsEnum docsEnum; private final DocsEnum docsEnum;
private final Similarity.ExactDocScorer docScorer; private final Similarity.ExactSimScorer docScorer;
/** /**
* Construct a <code>TermScorer</code>. * Construct a <code>TermScorer</code>.
@ -39,10 +39,10 @@ final class MatchOnlyTermScorer extends Scorer {
* @param td * @param td
* An iterator over the documents matching the <code>Term</code>. * An iterator over the documents matching the <code>Term</code>.
* @param docScorer * @param docScorer
* The </code>Similarity.ExactDocScorer</code> implementation * The </code>Similarity.ExactSimScorer</code> implementation
* to be used for score computations. * to be used for score computations.
*/ */
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;
this.docsEnum = td; this.docsEnum = td;

View File

@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
@ -137,7 +137,7 @@ public class MultiPhraseQuery extends Query {
private class MultiPhraseWeight extends Weight { private class MultiPhraseWeight extends Weight {
private final Similarity similarity; private final Similarity similarity;
private final Similarity.Stats stats; private final Similarity.SimWeight stats;
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>(); private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
public MultiPhraseWeight(IndexSearcher searcher) public MultiPhraseWeight(IndexSearcher searcher)
@ -157,8 +157,9 @@ public class MultiPhraseQuery extends Query {
allTermStats.add(searcher.termStatistics(term, termContext)); allTermStats.add(searcher.termStatistics(term, termContext));
} }
} }
stats = similarity.computeStats(searcher.collectionStatistics(field), stats = similarity.computeWeight(getBoost(),
getBoost(), allTermStats.toArray(new TermStatistics[allTermStats.size()])); searcher.collectionStatistics(field),
allTermStats.toArray(new TermStatistics[allTermStats.size()]));
} }
@Override @Override
@ -246,14 +247,14 @@ public class MultiPhraseQuery extends Query {
} }
if (slop == 0) { if (slop == 0) {
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context)); ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
if (s.noDocs) { if (s.noDocs) {
return null; return null;
} else { } else {
return s; return s;
} }
} else { } else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context)); return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
} }
} }
@ -264,7 +265,7 @@ public class MultiPhraseQuery extends Query {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.freq();
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
@ -183,7 +183,7 @@ public class PhraseQuery extends Query {
private class PhraseWeight extends Weight { private class PhraseWeight extends Weight {
private final Similarity similarity; private final Similarity similarity;
private final Similarity.Stats stats; private final Similarity.SimWeight stats;
private transient TermContext states[]; private transient TermContext states[];
public PhraseWeight(IndexSearcher searcher) public PhraseWeight(IndexSearcher searcher)
@ -197,7 +197,7 @@ public class PhraseQuery extends Query {
states[i] = TermContext.build(context, term, true); states[i] = TermContext.build(context, term, true);
termStats[i] = searcher.termStatistics(term, states[i]); termStats[i] = searcher.termStatistics(term, states[i]);
} }
stats = similarity.computeStats(searcher.collectionStatistics(field), getBoost(), termStats); stats = similarity.computeWeight(getBoost(), searcher.collectionStatistics(field), termStats);
} }
@Override @Override
@ -258,7 +258,7 @@ public class PhraseQuery extends Query {
} }
if (slop == 0) { // optimize exact case if (slop == 0) { // optimize exact case
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context)); ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
if (s.noDocs) { if (s.noDocs) {
return null; return null;
} else { } else {
@ -266,7 +266,7 @@ public class PhraseQuery extends Query {
} }
} else { } else {
return return
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context)); new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
} }
} }
@ -282,7 +282,7 @@ public class PhraseQuery extends Query {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.freq();
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -36,10 +36,10 @@ abstract class PhraseScorer extends Scorer {
private float freq; //phrase frequency in current doc as computed by phraseFreq(). private float freq; //phrase frequency in current doc as computed by phraseFreq().
final Similarity.SloppyDocScorer docScorer; final Similarity.SloppySimScorer docScorer;
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SloppyDocScorer docScorer) { Similarity.SloppySimScorer docScorer) {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;

View File

@ -30,7 +30,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppyDocScorer docScorer) { int slop, Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer); super(weight, postings, docScorer);
this.slop = slop; this.slop = slop;
} }

View File

@ -27,7 +27,7 @@ import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -45,7 +45,7 @@ public class TermQuery extends Query {
final class TermWeight extends Weight { final class TermWeight extends Weight {
private final Similarity similarity; private final Similarity similarity;
private final Similarity.Stats stats; private final Similarity.SimWeight stats;
private final TermContext termStates; private final TermContext termStates;
public TermWeight(IndexSearcher searcher, TermContext termStates) public TermWeight(IndexSearcher searcher, TermContext termStates)
@ -53,9 +53,9 @@ public class TermQuery extends Query {
assert termStates != null : "TermContext must not be null"; assert termStates != null : "TermContext must not be null";
this.termStates = termStates; this.termStates = termStates;
this.similarity = searcher.getSimilarityProvider().get(term.field()); this.similarity = searcher.getSimilarityProvider().get(term.field());
this.stats = similarity.computeStats( this.stats = similarity.computeWeight(
searcher.collectionStatistics(term.field()),
getBoost(), getBoost(),
searcher.collectionStatistics(term.field()),
searcher.termStatistics(term, termStates)); searcher.termStatistics(term, termStates));
} }
@ -95,10 +95,10 @@ public class TermQuery extends Query {
} }
/** /**
* Creates an {@link ExactDocScorer} for this {@link TermWeight}*/ * Creates an {@link ExactSimScorer} for this {@link TermWeight}*/
ExactDocScorer createDocScorer(AtomicReaderContext context) ExactSimScorer createDocScorer(AtomicReaderContext context)
throws IOException { throws IOException {
return similarity.exactDocScorer(stats, term.field(), context); return similarity.exactSimScorer(stats, context);
} }
/** /**
@ -130,7 +130,7 @@ public class TermQuery extends Query {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.freq();
ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context); ExactSimScorer docScorer = similarity.exactSimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq)); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));

View File

@ -26,7 +26,7 @@ import org.apache.lucene.search.similarities.Similarity;
*/ */
final class TermScorer extends Scorer { final class TermScorer extends Scorer {
private final DocsEnum docsEnum; private final DocsEnum docsEnum;
private final Similarity.ExactDocScorer docScorer; private final Similarity.ExactSimScorer docScorer;
/** /**
* Construct a <code>TermScorer</code>. * Construct a <code>TermScorer</code>.
@ -36,10 +36,10 @@ final class TermScorer extends Scorer {
* @param td * @param td
* An iterator over the documents matching the <code>Term</code>. * An iterator over the documents matching the <code>Term</code>.
* @param docScorer * @param docScorer
* The </code>Similarity.ExactDocScorer</code> implementation * The </code>Similarity.ExactSimScorer</code> implementation
* to be used for score computations. * to be used for score computations.
*/ */
TermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;
this.docsEnum = td; this.docsEnum = td;

View File

@ -25,7 +25,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.spans.NearSpansOrdered; import org.apache.lucene.search.spans.NearSpansOrdered;
import org.apache.lucene.search.spans.NearSpansUnordered; import org.apache.lucene.search.spans.NearSpansUnordered;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
@ -53,7 +53,7 @@ import java.util.Iterator;
* <p/> * <p/>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}. * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
* *
* @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef) * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
*/ */
public class PayloadNearQuery extends SpanNearQuery { public class PayloadNearQuery extends SpanNearQuery {
protected String fieldName; protected String fieldName;
@ -151,7 +151,7 @@ public class PayloadNearQuery extends SpanNearQuery {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this, return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.sloppyDocScorer(stats, query.getField(), context)); similarity, similarity.sloppySimScorer(stats, context));
} }
@Override @Override
@ -161,7 +161,7 @@ public class PayloadNearQuery extends SpanNearQuery {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.freq();
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
Explanation expl = new Explanation(); Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
@ -189,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
private int payloadsSeen; private int payloadsSeen;
protected PayloadNearSpanScorer(Spans spans, Weight weight, protected PayloadNearSpanScorer(Spans spans, Weight weight,
Similarity similarity, Similarity.SloppyDocScorer docScorer) throws IOException { Similarity similarity, Similarity.SloppySimScorer docScorer) throws IOException {
super(spans, weight, docScorer); super(spans, weight, docScorer);
this.spans = spans; this.spans = spans;
} }

View File

@ -28,7 +28,7 @@ import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer; import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.search.spans.TermSpans; import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.SpanWeight;
@ -49,7 +49,7 @@ import java.io.IOException;
* which returns 1 by default. * which returns 1 by default.
* <p/> * <p/>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}. * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
* @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef) * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
**/ **/
public class PayloadTermQuery extends SpanTermQuery { public class PayloadTermQuery extends SpanTermQuery {
protected PayloadFunction function; protected PayloadFunction function;
@ -82,7 +82,7 @@ public class PayloadTermQuery extends SpanTermQuery {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.sloppyDocScorer(stats, query.getField(), context)); this, similarity.sloppySimScorer(stats, context));
} }
protected class PayloadTermSpanScorer extends SpanScorer { protected class PayloadTermSpanScorer extends SpanScorer {
@ -91,7 +91,7 @@ public class PayloadTermQuery extends SpanTermQuery {
protected int payloadsSeen; protected int payloadsSeen;
private final TermSpans termSpans; private final TermSpans termSpans;
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppyDocScorer docScorer) throws IOException { public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
super(spans, weight, docScorer); super(spans, weight, docScorer);
termSpans = spans; termSpans = spans;
} }
@ -180,7 +180,7 @@ public class PayloadTermQuery extends SpanTermQuery {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.freq();
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
Explanation expl = new Explanation(); Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -153,7 +153,7 @@ public class BM25Similarity extends Similarity {
} }
@Override @Override
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
float avgdl = avgFieldLength(collectionStats); float avgdl = avgFieldLength(collectionStats);
@ -163,23 +163,25 @@ public class BM25Similarity extends Similarity {
for (int i = 0; i < cache.length; i++) { for (int i = 0; i < cache.length; i++) {
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl); cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
} }
return new BM25Stats(idf, queryBoost, avgdl, cache); return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
} }
@Override @Override
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final DocValues norms = context.reader().normValues(fieldName); BM25Stats bm25stats = (BM25Stats) stats;
final DocValues norms = context.reader().normValues(bm25stats.field);
return norms == null return norms == null
? new ExactBM25DocScorerNoNorms((BM25Stats)stats) ? new ExactBM25DocScorerNoNorms(bm25stats)
: new ExactBM25DocScorer((BM25Stats)stats, norms); : new ExactBM25DocScorer(bm25stats, norms);
} }
@Override @Override
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
return new SloppyBM25DocScorer((BM25Stats) stats, context.reader().normValues(fieldName)); BM25Stats bm25stats = (BM25Stats) stats;
return new SloppyBM25DocScorer(bm25stats, context.reader().normValues(bm25stats.field));
} }
private class ExactBM25DocScorer extends ExactDocScorer { private class ExactBM25DocScorer extends ExactSimScorer {
private final BM25Stats stats; private final BM25Stats stats;
private final float weightValue; private final float weightValue;
private final byte[] norms; private final byte[] norms;
@ -205,7 +207,7 @@ public class BM25Similarity extends Similarity {
} }
/** there are no norms, we act as if b=0 */ /** there are no norms, we act as if b=0 */
private class ExactBM25DocScorerNoNorms extends ExactDocScorer { private class ExactBM25DocScorerNoNorms extends ExactSimScorer {
private final BM25Stats stats; private final BM25Stats stats;
private final float weightValue; private final float weightValue;
private static final int SCORE_CACHE_SIZE = 32; private static final int SCORE_CACHE_SIZE = 32;
@ -232,7 +234,7 @@ public class BM25Similarity extends Similarity {
} }
} }
private class SloppyBM25DocScorer extends SloppyDocScorer { private class SloppyBM25DocScorer extends SloppySimScorer {
private final BM25Stats stats; private final BM25Stats stats;
private final float weightValue; // boost * idf * (k1 + 1) private final float weightValue; // boost * idf * (k1 + 1)
private final byte[] norms; private final byte[] norms;
@ -269,7 +271,7 @@ public class BM25Similarity extends Similarity {
} }
/** Collection statistics for the BM25 model. */ /** Collection statistics for the BM25 model. */
private static class BM25Stats extends Stats { private static class BM25Stats extends SimWeight {
/** BM25's idf */ /** BM25's idf */
private final Explanation idf; private final Explanation idf;
/** The average document length. */ /** The average document length. */
@ -280,10 +282,13 @@ public class BM25Similarity extends Similarity {
private float topLevelBoost; private float topLevelBoost;
/** weight (idf * boost) */ /** weight (idf * boost) */
private float weight; private float weight;
/** field name, for pulling norms */
private final String field;
/** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */ /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
private final float cache[]; private final float cache[];
BM25Stats(Explanation idf, float queryBoost, float avgdl, float cache[]) { BM25Stats(String field, Explanation idf, float queryBoost, float avgdl, float cache[]) {
this.field = field;
this.idf = idf; this.idf = idf;
this.queryBoost = queryBoost; this.queryBoost = queryBoost;
this.avgdl = avgdl; this.avgdl = avgdl;

View File

@ -23,7 +23,8 @@ import org.apache.lucene.index.Terms;
* Stores all statistics commonly used ranking methods. * Stores all statistics commonly used ranking methods.
* @lucene.experimental * @lucene.experimental
*/ */
public class BasicStats extends Similarity.Stats { public class BasicStats extends Similarity.SimWeight {
final String field;
/** The number of documents. */ /** The number of documents. */
protected long numberOfDocuments; protected long numberOfDocuments;
/** The total number of tokens in the field. */ /** The total number of tokens in the field. */
@ -47,7 +48,8 @@ public class BasicStats extends Similarity.Stats {
protected float totalBoost; protected float totalBoost;
/** Constructor. Sets the query boost. */ /** Constructor. Sets the query boost. */
public BasicStats(float queryBoost) { public BasicStats(String field, float queryBoost) {
this.field = field;
this.queryBoost = queryBoost; this.queryBoost = queryBoost;
this.totalBoost = queryBoost; this.totalBoost = queryBoost;
} }

View File

@ -51,8 +51,8 @@ public abstract class LMSimilarity extends SimilarityBase {
} }
@Override @Override
protected BasicStats newStats(float queryBoost) { protected BasicStats newStats(String field, float queryBoost) {
return new LMStats(queryBoost); return new LMStats(field, queryBoost);
} }
/** /**
@ -102,8 +102,8 @@ public abstract class LMSimilarity extends SimilarityBase {
/** The probability that the current term is generated by the collection. */ /** The probability that the current term is generated by the collection. */
private float collectionProbability; private float collectionProbability;
public LMStats(float queryBoost) { public LMStats(String field, float queryBoost) {
super(queryBoost); super(field, queryBoost);
} }
/** /**

View File

@ -46,43 +46,43 @@ public class MultiSimilarity extends Similarity {
} }
@Override @Override
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
Stats subStats[] = new Stats[sims.length]; SimWeight subStats[] = new SimWeight[sims.length];
for (int i = 0; i < subStats.length; i++) { for (int i = 0; i < subStats.length; i++) {
subStats[i] = sims[i].computeStats(collectionStats, queryBoost, termStats); subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
} }
return new MultiStats(subStats); return new MultiStats(subStats);
} }
@Override @Override
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
ExactDocScorer subScorers[] = new ExactDocScorer[sims.length]; ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) { for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].exactDocScorer(((MultiStats)stats).subStats[i], fieldName, context); subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
} }
return new MultiExactDocScorer(subScorers); return new MultiExactDocScorer(subScorers);
} }
@Override @Override
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
SloppyDocScorer subScorers[] = new SloppyDocScorer[sims.length]; SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) { for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].sloppyDocScorer(((MultiStats)stats).subStats[i], fieldName, context); subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
} }
return new MultiSloppyDocScorer(subScorers); return new MultiSloppyDocScorer(subScorers);
} }
public static class MultiExactDocScorer extends ExactDocScorer { public static class MultiExactDocScorer extends ExactSimScorer {
private final ExactDocScorer subScorers[]; private final ExactSimScorer subScorers[];
MultiExactDocScorer(ExactDocScorer subScorers[]) { MultiExactDocScorer(ExactSimScorer subScorers[]) {
this.subScorers = subScorers; this.subScorers = subScorers;
} }
@Override @Override
public float score(int doc, int freq) { public float score(int doc, int freq) {
float sum = 0.0f; float sum = 0.0f;
for (ExactDocScorer subScorer : subScorers) { for (ExactSimScorer subScorer : subScorers) {
sum += subScorer.score(doc, freq); sum += subScorer.score(doc, freq);
} }
return sum; return sum;
@ -91,24 +91,24 @@ public class MultiSimilarity extends Similarity {
@Override @Override
public Explanation explain(int doc, Explanation freq) { public Explanation explain(int doc, Explanation freq) {
Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:"); Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
for (ExactDocScorer subScorer : subScorers) { for (ExactSimScorer subScorer : subScorers) {
expl.addDetail(subScorer.explain(doc, freq)); expl.addDetail(subScorer.explain(doc, freq));
} }
return expl; return expl;
} }
} }
public static class MultiSloppyDocScorer extends SloppyDocScorer { public static class MultiSloppyDocScorer extends SloppySimScorer {
private final SloppyDocScorer subScorers[]; private final SloppySimScorer subScorers[];
MultiSloppyDocScorer(SloppyDocScorer subScorers[]) { MultiSloppyDocScorer(SloppySimScorer subScorers[]) {
this.subScorers = subScorers; this.subScorers = subScorers;
} }
@Override @Override
public float score(int doc, float freq) { public float score(int doc, float freq) {
float sum = 0.0f; float sum = 0.0f;
for (SloppyDocScorer subScorer : subScorers) { for (SloppySimScorer subScorer : subScorers) {
sum += subScorer.score(doc, freq); sum += subScorer.score(doc, freq);
} }
return sum; return sum;
@ -117,7 +117,7 @@ public class MultiSimilarity extends Similarity {
@Override @Override
public Explanation explain(int doc, Explanation freq) { public Explanation explain(int doc, Explanation freq) {
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:"); Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
for (SloppyDocScorer subScorer : subScorers) { for (SloppySimScorer subScorer : subScorers) {
expl.addDetail(subScorer.explain(doc, freq)); expl.addDetail(subScorer.explain(doc, freq));
} }
return expl; return expl;
@ -134,17 +134,17 @@ public class MultiSimilarity extends Similarity {
} }
} }
public static class MultiStats extends Stats { public static class MultiStats extends SimWeight {
final Stats subStats[]; final SimWeight subStats[];
MultiStats(Stats subStats[]) { MultiStats(SimWeight subStats[]) {
this.subStats = subStats; this.subStats = subStats;
} }
@Override @Override
public float getValueForNormalization() { public float getValueForNormalization() {
float sum = 0.0f; float sum = 0.0f;
for (Stats stat : subStats) { for (SimWeight stat : subStats) {
sum += stat.getValueForNormalization(); sum += stat.getValueForNormalization();
} }
return sum / subStats.length; return sum / subStats.length;
@ -152,7 +152,7 @@ public class MultiSimilarity extends Similarity {
@Override @Override
public void normalize(float queryNorm, float topLevelBoost) { public void normalize(float queryNorm, float topLevelBoost) {
for (Stats stat : subStats) { for (SimWeight stat : subStats) {
stat.normalize(queryNorm, topLevelBoost); stat.normalize(queryNorm, topLevelBoost);
} }
} }

View File

@ -17,16 +17,13 @@ package org.apache.lucene.search.similarities;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.document.DocValuesField; // javadoc import org.apache.lucene.document.DocValuesField; // javadoc
import org.apache.lucene.index.AtomicReader; // javadoc import org.apache.lucene.index.AtomicReader; // javadoc
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader; // javadoc
import org.apache.lucene.index.Norm; import org.apache.lucene.index.Norm;
import org.apache.lucene.index.Terms; // javadoc
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
@ -39,7 +36,6 @@ import org.apache.lucene.search.spans.SpanQuery; // javadoc
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SmallFloat; // javadoc import org.apache.lucene.util.SmallFloat; // javadoc
/** /**
* Similarity defines the components of Lucene scoring. * Similarity defines the components of Lucene scoring.
* <p> * <p>
@ -59,21 +55,21 @@ import org.apache.lucene.util.SmallFloat; // javadoc
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing * At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing
* the Similarity implementation to set a per-document value for the field that will * the Similarity implementation to set a per-document value for the field that will
* be later accessible via {@link AtomicReader#normValues(String)}. Lucene makes no assumption * be later accessible via {@link AtomicReader#normValues(String)}. Lucene makes no assumption
* about what is in this byte, but it is most useful for encoding length normalization * about what is in this norm, but it is most useful for encoding length normalization
* information. * information.
* <p> * <p>
* Implementations should carefully consider how the normalization byte is encoded: while * Implementations should carefully consider how the normalization is encoded: while
* Lucene's classical {@link TFIDFSimilarity} encodes a combination of index-time boost * Lucene's classical {@link TFIDFSimilarity} encodes a combination of index-time boost
* and length normalization information with {@link SmallFloat}, this might not be suitable * and length normalization information with {@link SmallFloat} into a single byte, this
* for all purposes. * might not be suitable for all purposes.
* <p> * <p>
* Many formulas require the use of average document length, which can be computed via a * Many formulas require the use of average document length, which can be computed via a
* combination of {@link Terms#getSumTotalTermFreq()} and {@link IndexReader#maxDoc()}, * combination of {@link CollectionStatistics#sumTotalTermFreq()} and
* {@link CollectionStatistics#maxDoc()} or {@link CollectionStatistics#docCount()},
* depending upon whether the average should reflect field sparsity.
* <p> * <p>
* Because index-time boost is handled entirely at the application level anyway, * Additional scoring factors can be stored in named {@link DocValuesField}s, and accessed
* an application can alternatively store the index-time boost separately using an * at query-time with {@link AtomicReader#docValues(String)}.
* {@link DocValuesField}, and access this at query-time with
* {@link AtomicReader#docValues(String)}.
* <p> * <p>
* Finally, using index-time boosts (either via folding into the normalization byte or * Finally, using index-time boosts (either via folding into the normalization byte or
* via DocValues), is an inefficient way to boost the scores of different fields if the * via DocValues), is an inefficient way to boost the scores of different fields if the
@ -84,19 +80,19 @@ import org.apache.lucene.util.SmallFloat; // javadoc
* <a name="querytime"/> * <a name="querytime"/>
* At query-time, Queries interact with the Similarity via these steps: * At query-time, Queries interact with the Similarity via these steps:
* <ol> * <ol>
* <li>The {@link #computeStats(CollectionStatistics, float, TermStatistics...)} method is called a single time, * <li>The {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} method is called a single time,
* allowing the implementation to compute any statistics (such as IDF, average document length, etc) * allowing the implementation to compute any statistics (such as IDF, average document length, etc)
* across <i>the entire collection</i>. The {@link TermStatistics} passed in already contain * across <i>the entire collection</i>. The {@link TermStatistics} and {@link CollectionStatistics} passed in
* the raw statistics involved, so a Similarity can freely use any combination * already contain all of the raw statistics involved, so a Similarity can freely use any combination
* of term statistics without causing any additional I/O. Lucene makes no assumption about what is * of statistics without causing any additional I/O. Lucene makes no assumption about what is
* stored in the returned {@link Similarity.Stats} object. * stored in the returned {@link Similarity.SimWeight} object.
* <li>The query normalization process occurs a single time: {@link Similarity.Stats#getValueForNormalization()} * <li>The query normalization process occurs a single time: {@link Similarity.SimWeight#getValueForNormalization()}
* is called for each query leaf node, {@link SimilarityProvider#queryNorm(float)} is called for the top-level * is called for each query leaf node, {@link SimilarityProvider#queryNorm(float)} is called for the top-level
* query, and finally {@link Similarity.Stats#normalize(float, float)} passes down the normalization value * query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s). * and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
* <li>For each segment in the index, the Query creates a {@link #exactDocScorer(Stats, String, AtomicReaderContext)} * <li>For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)}
* (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a * (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
* {@link #sloppyDocScorer(Stats, String, AtomicReaderContext)} (for queries with sloppy frequencies such as * {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as
* SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document. * SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
* </ol> * </ol>
* <p> * <p>
@ -130,27 +126,40 @@ public abstract class Similarity {
public abstract void computeNorm(FieldInvertState state, Norm norm); public abstract void computeNorm(FieldInvertState state, Norm norm);
/** /**
* Compute any collection-level stats (e.g. IDF, average document length, etc) needed for scoring a query. * Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query.
*
* @param queryBoost the query-time boost.
* @param collectionStats collection-level statistics, such as the number of tokens in the collection.
* @param termStats term-level statistics, such as the document frequency of a term across the collection.
* @return SimWeight object with the information this Similarity needs to score a query.
*/ */
public abstract Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats); public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats);
/** /**
* returns a new {@link Similarity.ExactDocScorer}. * Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index.
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
* @param context segment of the inverted index to be scored.
* @return ExactSimScorer for scoring documents across <code>context</code>
* @throws IOException
*/ */
public abstract ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException; public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
/** /**
* returns a new {@link Similarity.SloppyDocScorer}. * Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index.
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
* @param context segment of the inverted index to be scored.
* @return SloppySimScorer for scoring documents across <code>context</code>
* @throws IOException
*/ */
public abstract SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException; public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
/** /**
* API for scoring exact queries such as {@link TermQuery} and * API for scoring exact queries such as {@link TermQuery} and
* exact {@link PhraseQuery}. * exact {@link PhraseQuery}.
* <p> * <p>
* Term frequencies are integers (the term or phrase's tf) * Frequencies are integers (the term or phrase frequency within the document)
*/ */
public static abstract class ExactDocScorer { public static abstract class ExactSimScorer {
/** /**
* Score a single document * Score a single document
* @param doc document id * @param doc document id
@ -177,12 +186,14 @@ public abstract class Similarity {
* API for scoring "sloppy" queries such as {@link SpanQuery} and * API for scoring "sloppy" queries such as {@link SpanQuery} and
* sloppy {@link PhraseQuery}. * sloppy {@link PhraseQuery}.
* <p> * <p>
* Term frequencies are floating point values. * Frequencies are floating-point values: an approximate
* within-document frequency adjusted for "sloppiness" by
* {@link SloppySimScorer#computeSlopFactor(int)}.
*/ */
public static abstract class SloppyDocScorer { public static abstract class SloppySimScorer {
/** /**
* Score a single document * Score a single document
* @param doc document id * @param doc document id within the inverted index segment
* @param freq sloppy term frequency * @param freq sloppy term frequency
* @return document's score * @return document's score
*/ */
@ -196,7 +207,7 @@ public abstract class Similarity {
/** /**
* Explain the score for a single document * Explain the score for a single document
* @param doc document id * @param doc document id within the inverted index segment
* @param freq Explanation of how the sloppy term frequency was computed * @param freq Explanation of how the sloppy term frequency was computed
* @return document's score * @return document's score
*/ */
@ -208,12 +219,12 @@ public abstract class Similarity {
} }
} }
/** Stores the statistics for the indexed collection. This abstract /** Stores the weight for a query across the indexed collection. This abstract
* implementation is empty; descendants of {@code Similarity} should * implementation is empty; descendants of {@code Similarity} should
* subclass {@code Stats} and define the statistics they require in the * subclass {@code SimWeight} and define the statistics they require in the
* subclass. Examples include idf, average field length, etc. * subclass. Examples include idf, average field length, etc.
*/ */
public static abstract class Stats { public static abstract class SimWeight {
/** The value for normalization of contained query clauses (e.g. sum of squared weights). /** The value for normalization of contained query clauses (e.g. sum of squared weights).
* <p> * <p>

View File

@ -70,18 +70,18 @@ public abstract class SimilarityBase extends Similarity {
} }
@Override @Override
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
BasicStats stats[] = new BasicStats[termStats.length]; BasicStats stats[] = new BasicStats[termStats.length];
for (int i = 0; i < termStats.length; i++) { for (int i = 0; i < termStats.length; i++) {
stats[i] = newStats(queryBoost); stats[i] = newStats(collectionStats.field(), queryBoost);
fillBasicStats(stats[i], collectionStats, termStats[i]); fillBasicStats(stats[i], collectionStats, termStats[i]);
} }
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats); return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
} }
/** Factory method to return a custom stats object */ /** Factory method to return a custom stats object */
protected BasicStats newStats(float queryBoost) { protected BasicStats newStats(String field, float queryBoost) {
return new BasicStats(queryBoost); return new BasicStats(field, queryBoost);
} }
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. /** Fills all member fields defined in {@code BasicStats} in {@code stats}.
@ -179,40 +179,38 @@ public abstract class SimilarityBase extends Similarity {
} }
@Override @Override
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
AtomicReaderContext context) throws IOException {
DocValues norms = context.reader().normValues(fieldName);
if (stats instanceof MultiSimilarity.MultiStats) { if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation, // a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query // scoring almost as if it were boolean query
Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
ExactDocScorer subScorers[] = new ExactDocScorer[subStats.length]; ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) { for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = new BasicExactDocScorer((BasicStats)subStats[i], norms); BasicStats basicstats = (BasicStats) subStats[i];
subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
} }
return new MultiSimilarity.MultiExactDocScorer(subScorers); return new MultiSimilarity.MultiExactDocScorer(subScorers);
} else { } else {
return new BasicExactDocScorer((BasicStats) stats, norms); BasicStats basicstats = (BasicStats) stats;
return new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
} }
} }
@Override @Override
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
AtomicReaderContext context) throws IOException {
DocValues norms = context.reader().normValues(fieldName);
if (stats instanceof MultiSimilarity.MultiStats) { if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation, // a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query // scoring almost as if it were boolean query
Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
SloppyDocScorer subScorers[] = new SloppyDocScorer[subStats.length]; SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) { for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = new BasicSloppyDocScorer((BasicStats)subStats[i], norms); BasicStats basicstats = (BasicStats) subStats[i];
subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
} }
return new MultiSimilarity.MultiSloppyDocScorer(subScorers); return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
} else { } else {
return new BasicSloppyDocScorer((BasicStats) stats, norms); BasicStats basicstats = (BasicStats) stats;
return new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
} }
} }
@ -274,7 +272,7 @@ public abstract class SimilarityBase extends Similarity {
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)}, * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
* respectively. * respectively.
*/ */
private class BasicExactDocScorer extends ExactDocScorer { private class BasicExactDocScorer extends ExactSimScorer {
private final BasicStats stats; private final BasicStats stats;
private final byte[] norms; private final byte[] norms;
@ -303,7 +301,7 @@ public abstract class SimilarityBase extends Similarity {
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)}, * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
* respectively. * respectively.
*/ */
private class BasicSloppyDocScorer extends SloppyDocScorer { private class BasicSloppyDocScorer extends SloppySimScorer {
private final BasicStats stats; private final BasicStats stats;
private final byte[] norms; private final byte[] norms;

View File

@ -694,26 +694,28 @@ public abstract class TFIDFSimilarity extends Similarity {
public abstract float scorePayload(int doc, int start, int end, BytesRef payload); public abstract float scorePayload(int doc, int start, int end, BytesRef payload);
@Override @Override
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
final Explanation idf = termStats.length == 1 final Explanation idf = termStats.length == 1
? idfExplain(collectionStats, termStats[0]) ? idfExplain(collectionStats, termStats[0])
: idfExplain(collectionStats, termStats); : idfExplain(collectionStats, termStats);
return new IDFStats(idf, queryBoost); return new IDFStats(collectionStats.field(), idf, queryBoost);
} }
@Override @Override
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
return new ExactTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName)); IDFStats idfstats = (IDFStats) stats;
return new ExactTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
} }
@Override @Override
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
return new SloppyTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName)); IDFStats idfstats = (IDFStats) stats;
return new SloppyTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
} }
// TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot. // TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
private final class ExactTFIDFDocScorer extends ExactDocScorer { private final class ExactTFIDFDocScorer extends ExactSimScorer {
private final IDFStats stats; private final IDFStats stats;
private final float weightValue; private final float weightValue;
private final byte[] norms; private final byte[] norms;
@ -744,7 +746,7 @@ public abstract class TFIDFSimilarity extends Similarity {
} }
} }
private final class SloppyTFIDFDocScorer extends SloppyDocScorer { private final class SloppyTFIDFDocScorer extends SloppySimScorer {
private final IDFStats stats; private final IDFStats stats;
private final float weightValue; private final float weightValue;
private final byte[] norms; private final byte[] norms;
@ -780,7 +782,8 @@ public abstract class TFIDFSimilarity extends Similarity {
/** Collection statistics for the TF-IDF model. The only statistic of interest /** Collection statistics for the TF-IDF model. The only statistic of interest
* to this model is idf. */ * to this model is idf. */
private static class IDFStats extends Stats { private static class IDFStats extends SimWeight {
private final String field;
/** The idf and its explanation */ /** The idf and its explanation */
private final Explanation idf; private final Explanation idf;
private float queryNorm; private float queryNorm;
@ -788,8 +791,9 @@ public abstract class TFIDFSimilarity extends Similarity {
private final float queryBoost; private final float queryBoost;
private float value; private float value;
public IDFStats(Explanation idf, float queryBoost) { public IDFStats(String field, Explanation idf, float queryBoost) {
// TODO: Validate? // TODO: Validate?
this.field = field;
this.idf = idf; this.idf = idf;
this.queryBoost = queryBoost; this.queryBoost = queryBoost;
this.queryWeight = idf.getValue() * queryBoost; // compute query weight this.queryWeight = idf.getValue() * queryBoost; // compute query weight

View File

@ -33,9 +33,9 @@ public class SpanScorer extends Scorer {
protected int doc; protected int doc;
protected float freq; protected float freq;
protected final Similarity.SloppyDocScorer docScorer; protected final Similarity.SloppySimScorer docScorer;
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppyDocScorer docScorer) protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
throws IOException { throws IOException {
super(weight); super(weight);
this.docScorer = docScorer; this.docScorer = docScorer;

View File

@ -22,7 +22,7 @@ import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext; import org.apache.lucene.util.TermContext;
@ -38,7 +38,7 @@ public class SpanWeight extends Weight {
protected Similarity similarity; protected Similarity similarity;
protected Map<Term,TermContext> termContexts; protected Map<Term,TermContext> termContexts;
protected SpanQuery query; protected SpanQuery query;
protected Similarity.Stats stats; protected Similarity.SimWeight stats;
public SpanWeight(SpanQuery query, IndexSearcher searcher) public SpanWeight(SpanQuery query, IndexSearcher searcher)
throws IOException { throws IOException {
@ -57,9 +57,8 @@ public class SpanWeight extends Weight {
termContexts.put(term, state); termContexts.put(term, state);
i++; i++;
} }
stats = similarity.computeStats( stats = similarity.computeWeight(query.getBoost(),
searcher.collectionStatistics(query.getField()), searcher.collectionStatistics(query.getField()),
query.getBoost(),
termStats); termStats);
} }
@ -79,7 +78,7 @@ public class SpanWeight extends Weight {
@Override @Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context)); return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
} }
@Override @Override
@ -89,7 +88,7 @@ public class SpanWeight extends Weight {
int newDoc = scorer.advance(doc); int newDoc = scorer.advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = scorer.freq(); float freq = scorer.freq();
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation(); ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));

View File

@ -191,7 +191,7 @@ final class JustCompileSearch {
static final class JustCompilePhraseScorer extends PhraseScorer { static final class JustCompilePhraseScorer extends PhraseScorer {
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SloppyDocScorer docScorer) throws IOException { Similarity.SloppySimScorer docScorer) throws IOException {
super(weight, postings, docScorer); super(weight, postings, docScorer);
} }
@ -247,17 +247,17 @@ final class JustCompileSearch {
static final class JustCompileSimilarity extends Similarity { static final class JustCompileSimilarity extends Similarity {
@Override @Override
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
@Override @Override
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }
@Override @Override
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }

View File

@ -158,16 +158,16 @@ public class TestDocValuesScoring extends LuceneTestCase {
} }
@Override @Override
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return sim.computeStats(collectionStats, queryBoost, termStats); return sim.computeWeight(queryBoost, collectionStats, termStats);
} }
@Override @Override
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final ExactDocScorer sub = sim.exactDocScorer(stats, fieldName, context); final ExactSimScorer sub = sim.exactSimScorer(stats, context);
final Source values = context.reader().docValues(boostField).getSource(); final Source values = context.reader().docValues(boostField).getSource();
return new ExactDocScorer() { return new ExactSimScorer() {
@Override @Override
public float score(int doc, int freq) { public float score(int doc, int freq) {
return (float) values.getFloat(doc) * sub.score(doc, freq); return (float) values.getFloat(doc) * sub.score(doc, freq);
@ -186,11 +186,11 @@ public class TestDocValuesScoring extends LuceneTestCase {
} }
@Override @Override
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final SloppyDocScorer sub = sim.sloppyDocScorer(stats, fieldName, context); final SloppySimScorer sub = sim.sloppySimScorer(stats, context);
final Source values = context.reader().docValues(boostField).getSource(); final Source values = context.reader().docValues(boostField).getSource();
return new SloppyDocScorer() { return new SloppySimScorer() {
@Override @Override
public float score(int doc, float freq) { public float score(int doc, float freq) {
return (float) values.getFloat(doc) * sub.score(doc, freq); return (float) values.getFloat(doc) * sub.score(doc, freq);

View File

@ -23,11 +23,9 @@ import java.util.List;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
@ -40,8 +38,6 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TermContext;
import org.junit.Ignore;
/** /**
* Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and * Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and
@ -167,7 +163,7 @@ public class TestSimilarityBase extends LuceneTestCase {
/** Creates the default statistics object that the specific tests modify. */ /** Creates the default statistics object that the specific tests modify. */
private BasicStats createStats() { private BasicStats createStats() {
BasicStats stats = new BasicStats(1); BasicStats stats = new BasicStats("spoof", 1);
stats.setNumberOfDocuments(NUMBER_OF_DOCUMENTS); stats.setNumberOfDocuments(NUMBER_OF_DOCUMENTS);
stats.setNumberOfFieldTokens(NUMBER_OF_FIELD_TOKENS); stats.setNumberOfFieldTokens(NUMBER_OF_FIELD_TOKENS);
stats.setAvgFieldLength(AVG_FIELD_LENGTH); stats.setAvgFieldLength(AVG_FIELD_LENGTH);
@ -177,7 +173,7 @@ public class TestSimilarityBase extends LuceneTestCase {
} }
private CollectionStatistics toCollectionStats(BasicStats stats) { private CollectionStatistics toCollectionStats(BasicStats stats) {
return new CollectionStatistics("spoof", stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1); return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
} }
private TermStatistics toTermStats(BasicStats stats) { private TermStatistics toTermStats(BasicStats stats) {
@ -192,8 +188,8 @@ public class TestSimilarityBase extends LuceneTestCase {
private void unitTestCore(BasicStats stats, float freq, int docLen) private void unitTestCore(BasicStats stats, float freq, int docLen)
throws IOException { throws IOException {
for (SimilarityBase sim : sims) { for (SimilarityBase sim : sims) {
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats), BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(),
stats.getTotalBoost(), toCollectionStats(stats),
toTermStats(stats)); toTermStats(stats));
float score = sim.score(realStats, freq, docLen); float score = sim.score(realStats, freq, docLen);
float explScore = sim.explain( float explScore = sim.explain(
@ -525,8 +521,8 @@ public class TestSimilarityBase extends LuceneTestCase {
private void correctnessTestCore(SimilarityBase sim, float gold) private void correctnessTestCore(SimilarityBase sim, float gold)
throws IOException { throws IOException {
BasicStats stats = createStats(); BasicStats stats = createStats();
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats), BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(),
stats.getTotalBoost(), toCollectionStats(stats),
toTermStats(stats)); toTermStats(stats));
float score = sim.score(realStats, FREQ, DOC_LEN); float score = sim.score(realStats, FREQ, DOC_LEN);
assertEquals( assertEquals(

View File

@ -139,7 +139,7 @@ final class JustCompileSearchSpans {
static final class JustCompileSpanScorer extends SpanScorer { static final class JustCompileSpanScorer extends SpanScorer {
protected JustCompileSpanScorer(Spans spans, Weight weight, protected JustCompileSpanScorer(Spans spans, Weight weight,
Similarity.SloppyDocScorer docScorer) throws IOException { Similarity.SloppySimScorer docScorer) throws IOException {
super(spans, weight, docScorer); super(spans, weight, docScorer);
} }