mirror of https://github.com/apache/lucene.git
LUCENE-3749: Similarity.java javadocs and simplifications for 4.0
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1239941 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d80355fd21
commit
8fbd9d7673
|
@ -28,7 +28,7 @@ import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
|
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
|
||||||
import org.apache.lucene.search.TermQuery.TermWeight;
|
import org.apache.lucene.search.TermQuery.TermWeight;
|
||||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
@ -362,7 +362,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
if (termsEnum == null) {
|
if (termsEnum == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
final ExactDocScorer docScorer = weight.createDocScorer(context);
|
final ExactSimScorer docScorer = weight.createDocScorer(context);
|
||||||
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
|
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
|
||||||
if (docsAndFreqsEnum == null) {
|
if (docsAndFreqsEnum == null) {
|
||||||
// TODO: we could carry over TermState from the
|
// TODO: we could carry over TermState from the
|
||||||
|
@ -394,7 +394,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
if (termsEnum == null) {
|
if (termsEnum == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
final ExactDocScorer docScorer = weight.createDocScorer(context);
|
final ExactSimScorer docScorer = weight.createDocScorer(context);
|
||||||
docsAndFreqs[i] = new DocsAndFreqs(null,
|
docsAndFreqs[i] = new DocsAndFreqs(null,
|
||||||
termsEnum.docs(acceptDocs, null, false),
|
termsEnum.docs(acceptDocs, null, false),
|
||||||
termsEnum.docFreq(), docScorer);
|
termsEnum.docFreq(), docScorer);
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
/** Scorer for conjunctions, sets of terms, all of which are required. */
|
/** Scorer for conjunctions, sets of terms, all of which are required. */
|
||||||
|
@ -100,10 +100,10 @@ class ConjunctionTermScorer extends Scorer {
|
||||||
final DocsEnum docsAndFreqs;
|
final DocsEnum docsAndFreqs;
|
||||||
final DocsEnum docs;
|
final DocsEnum docs;
|
||||||
final int docFreq;
|
final int docFreq;
|
||||||
final ExactDocScorer docScorer;
|
final ExactSimScorer docScorer;
|
||||||
int doc = -1;
|
int doc = -1;
|
||||||
|
|
||||||
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) {
|
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactSimScorer docScorer) {
|
||||||
this.docsAndFreqs = docsAndFreqs;
|
this.docsAndFreqs = docsAndFreqs;
|
||||||
this.docs = docs;
|
this.docs = docs;
|
||||||
this.docFreq = docFreq;
|
this.docFreq = docFreq;
|
||||||
|
|
|
@ -55,10 +55,10 @@ final class ExactPhraseScorer extends Scorer {
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private int freq;
|
private int freq;
|
||||||
|
|
||||||
private final Similarity.ExactDocScorer docScorer;
|
private final Similarity.ExactSimScorer docScorer;
|
||||||
|
|
||||||
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||||
Similarity.ExactDocScorer docScorer) throws IOException {
|
Similarity.ExactSimScorer docScorer) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
|
||||||
final class MatchOnlyTermScorer extends Scorer {
|
final class MatchOnlyTermScorer extends Scorer {
|
||||||
private final DocsEnum docsEnum;
|
private final DocsEnum docsEnum;
|
||||||
private final Similarity.ExactDocScorer docScorer;
|
private final Similarity.ExactSimScorer docScorer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a <code>TermScorer</code>.
|
* Construct a <code>TermScorer</code>.
|
||||||
|
@ -39,10 +39,10 @@ final class MatchOnlyTermScorer extends Scorer {
|
||||||
* @param td
|
* @param td
|
||||||
* An iterator over the documents matching the <code>Term</code>.
|
* An iterator over the documents matching the <code>Term</code>.
|
||||||
* @param docScorer
|
* @param docScorer
|
||||||
* The </code>Similarity.ExactDocScorer</code> implementation
|
* The </code>Similarity.ExactSimScorer</code> implementation
|
||||||
* to be used for score computations.
|
* to be used for score computations.
|
||||||
*/
|
*/
|
||||||
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException {
|
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
this.docsEnum = td;
|
this.docsEnum = td;
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -137,7 +137,7 @@ public class MultiPhraseQuery extends Query {
|
||||||
|
|
||||||
private class MultiPhraseWeight extends Weight {
|
private class MultiPhraseWeight extends Weight {
|
||||||
private final Similarity similarity;
|
private final Similarity similarity;
|
||||||
private final Similarity.Stats stats;
|
private final Similarity.SimWeight stats;
|
||||||
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||||
|
|
||||||
public MultiPhraseWeight(IndexSearcher searcher)
|
public MultiPhraseWeight(IndexSearcher searcher)
|
||||||
|
@ -157,8 +157,9 @@ public class MultiPhraseQuery extends Query {
|
||||||
allTermStats.add(searcher.termStatistics(term, termContext));
|
allTermStats.add(searcher.termStatistics(term, termContext));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats = similarity.computeStats(searcher.collectionStatistics(field),
|
stats = similarity.computeWeight(getBoost(),
|
||||||
getBoost(), allTermStats.toArray(new TermStatistics[allTermStats.size()]));
|
searcher.collectionStatistics(field),
|
||||||
|
allTermStats.toArray(new TermStatistics[allTermStats.size()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -246,14 +247,14 @@ public class MultiPhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slop == 0) {
|
if (slop == 0) {
|
||||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context));
|
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
|
||||||
if (s.noDocs) {
|
if (s.noDocs) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context));
|
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -264,7 +265,7 @@ public class MultiPhraseQuery extends Query {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context);
|
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||||
ComplexExplanation result = new ComplexExplanation();
|
ComplexExplanation result = new ComplexExplanation();
|
||||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -183,7 +183,7 @@ public class PhraseQuery extends Query {
|
||||||
|
|
||||||
private class PhraseWeight extends Weight {
|
private class PhraseWeight extends Weight {
|
||||||
private final Similarity similarity;
|
private final Similarity similarity;
|
||||||
private final Similarity.Stats stats;
|
private final Similarity.SimWeight stats;
|
||||||
private transient TermContext states[];
|
private transient TermContext states[];
|
||||||
|
|
||||||
public PhraseWeight(IndexSearcher searcher)
|
public PhraseWeight(IndexSearcher searcher)
|
||||||
|
@ -197,7 +197,7 @@ public class PhraseQuery extends Query {
|
||||||
states[i] = TermContext.build(context, term, true);
|
states[i] = TermContext.build(context, term, true);
|
||||||
termStats[i] = searcher.termStatistics(term, states[i]);
|
termStats[i] = searcher.termStatistics(term, states[i]);
|
||||||
}
|
}
|
||||||
stats = similarity.computeStats(searcher.collectionStatistics(field), getBoost(), termStats);
|
stats = similarity.computeWeight(getBoost(), searcher.collectionStatistics(field), termStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -258,7 +258,7 @@ public class PhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slop == 0) { // optimize exact case
|
if (slop == 0) { // optimize exact case
|
||||||
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context));
|
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
|
||||||
if (s.noDocs) {
|
if (s.noDocs) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
|
@ -266,7 +266,7 @@ public class PhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return
|
return
|
||||||
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context));
|
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,7 +282,7 @@ public class PhraseQuery extends Query {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context);
|
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||||
ComplexExplanation result = new ComplexExplanation();
|
ComplexExplanation result = new ComplexExplanation();
|
||||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||||
|
|
|
@ -36,10 +36,10 @@ abstract class PhraseScorer extends Scorer {
|
||||||
|
|
||||||
private float freq; //phrase frequency in current doc as computed by phraseFreq().
|
private float freq; //phrase frequency in current doc as computed by phraseFreq().
|
||||||
|
|
||||||
final Similarity.SloppyDocScorer docScorer;
|
final Similarity.SloppySimScorer docScorer;
|
||||||
|
|
||||||
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||||
Similarity.SloppyDocScorer docScorer) {
|
Similarity.SloppySimScorer docScorer) {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
||||||
private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
|
private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
|
||||||
|
|
||||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||||
int slop, Similarity.SloppyDocScorer docScorer) {
|
int slop, Similarity.SloppySimScorer docScorer) {
|
||||||
super(weight, postings, docScorer);
|
super(weight, postings, docScorer);
|
||||||
this.slop = slop;
|
this.slop = slop;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -45,7 +45,7 @@ public class TermQuery extends Query {
|
||||||
|
|
||||||
final class TermWeight extends Weight {
|
final class TermWeight extends Weight {
|
||||||
private final Similarity similarity;
|
private final Similarity similarity;
|
||||||
private final Similarity.Stats stats;
|
private final Similarity.SimWeight stats;
|
||||||
private final TermContext termStates;
|
private final TermContext termStates;
|
||||||
|
|
||||||
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
||||||
|
@ -53,9 +53,9 @@ public class TermQuery extends Query {
|
||||||
assert termStates != null : "TermContext must not be null";
|
assert termStates != null : "TermContext must not be null";
|
||||||
this.termStates = termStates;
|
this.termStates = termStates;
|
||||||
this.similarity = searcher.getSimilarityProvider().get(term.field());
|
this.similarity = searcher.getSimilarityProvider().get(term.field());
|
||||||
this.stats = similarity.computeStats(
|
this.stats = similarity.computeWeight(
|
||||||
searcher.collectionStatistics(term.field()),
|
|
||||||
getBoost(),
|
getBoost(),
|
||||||
|
searcher.collectionStatistics(term.field()),
|
||||||
searcher.termStatistics(term, termStates));
|
searcher.termStatistics(term, termStates));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,10 +95,10 @@ public class TermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an {@link ExactDocScorer} for this {@link TermWeight}*/
|
* Creates an {@link ExactSimScorer} for this {@link TermWeight}*/
|
||||||
ExactDocScorer createDocScorer(AtomicReaderContext context)
|
ExactSimScorer createDocScorer(AtomicReaderContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return similarity.exactDocScorer(stats, term.field(), context);
|
return similarity.exactSimScorer(stats, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -130,7 +130,7 @@ public class TermQuery extends Query {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context);
|
ExactSimScorer docScorer = similarity.exactSimScorer(stats, context);
|
||||||
ComplexExplanation result = new ComplexExplanation();
|
ComplexExplanation result = new ComplexExplanation();
|
||||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
|
||||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.search.similarities.Similarity;
|
||||||
*/
|
*/
|
||||||
final class TermScorer extends Scorer {
|
final class TermScorer extends Scorer {
|
||||||
private final DocsEnum docsEnum;
|
private final DocsEnum docsEnum;
|
||||||
private final Similarity.ExactDocScorer docScorer;
|
private final Similarity.ExactSimScorer docScorer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a <code>TermScorer</code>.
|
* Construct a <code>TermScorer</code>.
|
||||||
|
@ -36,10 +36,10 @@ final class TermScorer extends Scorer {
|
||||||
* @param td
|
* @param td
|
||||||
* An iterator over the documents matching the <code>Term</code>.
|
* An iterator over the documents matching the <code>Term</code>.
|
||||||
* @param docScorer
|
* @param docScorer
|
||||||
* The </code>Similarity.ExactDocScorer</code> implementation
|
* The </code>Similarity.ExactSimScorer</code> implementation
|
||||||
* to be used for score computations.
|
* to be used for score computations.
|
||||||
*/
|
*/
|
||||||
TermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException {
|
TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
this.docsEnum = td;
|
this.docsEnum = td;
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||||
import org.apache.lucene.search.spans.NearSpansOrdered;
|
import org.apache.lucene.search.spans.NearSpansOrdered;
|
||||||
import org.apache.lucene.search.spans.NearSpansUnordered;
|
import org.apache.lucene.search.spans.NearSpansUnordered;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
@ -53,7 +53,7 @@ import java.util.Iterator;
|
||||||
* <p/>
|
* <p/>
|
||||||
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
||||||
*
|
*
|
||||||
* @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
|
* @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||||
*/
|
*/
|
||||||
public class PayloadNearQuery extends SpanNearQuery {
|
public class PayloadNearQuery extends SpanNearQuery {
|
||||||
protected String fieldName;
|
protected String fieldName;
|
||||||
|
@ -151,7 +151,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
||||||
similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
|
similarity, similarity.sloppySimScorer(stats, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -161,7 +161,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context);
|
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||||
Explanation expl = new Explanation();
|
Explanation expl = new Explanation();
|
||||||
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||||
|
@ -189,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
private int payloadsSeen;
|
private int payloadsSeen;
|
||||||
|
|
||||||
protected PayloadNearSpanScorer(Spans spans, Weight weight,
|
protected PayloadNearSpanScorer(Spans spans, Weight weight,
|
||||||
Similarity similarity, Similarity.SloppyDocScorer docScorer) throws IOException {
|
Similarity similarity, Similarity.SloppySimScorer docScorer) throws IOException {
|
||||||
super(spans, weight, docScorer);
|
super(spans, weight, docScorer);
|
||||||
this.spans = spans;
|
this.spans = spans;
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.search.ComplexExplanation;
|
||||||
import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer;
|
import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||||
import org.apache.lucene.search.spans.TermSpans;
|
import org.apache.lucene.search.spans.TermSpans;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.search.spans.SpanWeight;
|
import org.apache.lucene.search.spans.SpanWeight;
|
||||||
|
@ -49,7 +49,7 @@ import java.io.IOException;
|
||||||
* which returns 1 by default.
|
* which returns 1 by default.
|
||||||
* <p/>
|
* <p/>
|
||||||
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
|
||||||
* @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef)
|
* @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
|
||||||
**/
|
**/
|
||||||
public class PayloadTermQuery extends SpanTermQuery {
|
public class PayloadTermQuery extends SpanTermQuery {
|
||||||
protected PayloadFunction function;
|
protected PayloadFunction function;
|
||||||
|
@ -82,7 +82,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
||||||
this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
this, similarity.sloppySimScorer(stats, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected class PayloadTermSpanScorer extends SpanScorer {
|
protected class PayloadTermSpanScorer extends SpanScorer {
|
||||||
|
@ -91,7 +91,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
protected int payloadsSeen;
|
protected int payloadsSeen;
|
||||||
private final TermSpans termSpans;
|
private final TermSpans termSpans;
|
||||||
|
|
||||||
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppyDocScorer docScorer) throws IOException {
|
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
|
||||||
super(spans, weight, docScorer);
|
super(spans, weight, docScorer);
|
||||||
termSpans = spans;
|
termSpans = spans;
|
||||||
}
|
}
|
||||||
|
@ -180,7 +180,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context);
|
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||||
Explanation expl = new Explanation();
|
Explanation expl = new Explanation();
|
||||||
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||||
|
|
|
@ -153,7 +153,7 @@ public class BM25Similarity extends Similarity {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
||||||
|
|
||||||
float avgdl = avgFieldLength(collectionStats);
|
float avgdl = avgFieldLength(collectionStats);
|
||||||
|
@ -163,23 +163,25 @@ public class BM25Similarity extends Similarity {
|
||||||
for (int i = 0; i < cache.length; i++) {
|
for (int i = 0; i < cache.length; i++) {
|
||||||
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
|
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
|
||||||
}
|
}
|
||||||
return new BM25Stats(idf, queryBoost, avgdl, cache);
|
return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
final DocValues norms = context.reader().normValues(fieldName);
|
BM25Stats bm25stats = (BM25Stats) stats;
|
||||||
|
final DocValues norms = context.reader().normValues(bm25stats.field);
|
||||||
return norms == null
|
return norms == null
|
||||||
? new ExactBM25DocScorerNoNorms((BM25Stats)stats)
|
? new ExactBM25DocScorerNoNorms(bm25stats)
|
||||||
: new ExactBM25DocScorer((BM25Stats)stats, norms);
|
: new ExactBM25DocScorer(bm25stats, norms);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
return new SloppyBM25DocScorer((BM25Stats) stats, context.reader().normValues(fieldName));
|
BM25Stats bm25stats = (BM25Stats) stats;
|
||||||
|
return new SloppyBM25DocScorer(bm25stats, context.reader().normValues(bm25stats.field));
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ExactBM25DocScorer extends ExactDocScorer {
|
private class ExactBM25DocScorer extends ExactSimScorer {
|
||||||
private final BM25Stats stats;
|
private final BM25Stats stats;
|
||||||
private final float weightValue;
|
private final float weightValue;
|
||||||
private final byte[] norms;
|
private final byte[] norms;
|
||||||
|
@ -205,7 +207,7 @@ public class BM25Similarity extends Similarity {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** there are no norms, we act as if b=0 */
|
/** there are no norms, we act as if b=0 */
|
||||||
private class ExactBM25DocScorerNoNorms extends ExactDocScorer {
|
private class ExactBM25DocScorerNoNorms extends ExactSimScorer {
|
||||||
private final BM25Stats stats;
|
private final BM25Stats stats;
|
||||||
private final float weightValue;
|
private final float weightValue;
|
||||||
private static final int SCORE_CACHE_SIZE = 32;
|
private static final int SCORE_CACHE_SIZE = 32;
|
||||||
|
@ -232,7 +234,7 @@ public class BM25Similarity extends Similarity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SloppyBM25DocScorer extends SloppyDocScorer {
|
private class SloppyBM25DocScorer extends SloppySimScorer {
|
||||||
private final BM25Stats stats;
|
private final BM25Stats stats;
|
||||||
private final float weightValue; // boost * idf * (k1 + 1)
|
private final float weightValue; // boost * idf * (k1 + 1)
|
||||||
private final byte[] norms;
|
private final byte[] norms;
|
||||||
|
@ -269,7 +271,7 @@ public class BM25Similarity extends Similarity {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Collection statistics for the BM25 model. */
|
/** Collection statistics for the BM25 model. */
|
||||||
private static class BM25Stats extends Stats {
|
private static class BM25Stats extends SimWeight {
|
||||||
/** BM25's idf */
|
/** BM25's idf */
|
||||||
private final Explanation idf;
|
private final Explanation idf;
|
||||||
/** The average document length. */
|
/** The average document length. */
|
||||||
|
@ -280,10 +282,13 @@ public class BM25Similarity extends Similarity {
|
||||||
private float topLevelBoost;
|
private float topLevelBoost;
|
||||||
/** weight (idf * boost) */
|
/** weight (idf * boost) */
|
||||||
private float weight;
|
private float weight;
|
||||||
|
/** field name, for pulling norms */
|
||||||
|
private final String field;
|
||||||
/** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
|
/** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
|
||||||
private final float cache[];
|
private final float cache[];
|
||||||
|
|
||||||
BM25Stats(Explanation idf, float queryBoost, float avgdl, float cache[]) {
|
BM25Stats(String field, Explanation idf, float queryBoost, float avgdl, float cache[]) {
|
||||||
|
this.field = field;
|
||||||
this.idf = idf;
|
this.idf = idf;
|
||||||
this.queryBoost = queryBoost;
|
this.queryBoost = queryBoost;
|
||||||
this.avgdl = avgdl;
|
this.avgdl = avgdl;
|
||||||
|
|
|
@ -23,7 +23,8 @@ import org.apache.lucene.index.Terms;
|
||||||
* Stores all statistics commonly used ranking methods.
|
* Stores all statistics commonly used ranking methods.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class BasicStats extends Similarity.Stats {
|
public class BasicStats extends Similarity.SimWeight {
|
||||||
|
final String field;
|
||||||
/** The number of documents. */
|
/** The number of documents. */
|
||||||
protected long numberOfDocuments;
|
protected long numberOfDocuments;
|
||||||
/** The total number of tokens in the field. */
|
/** The total number of tokens in the field. */
|
||||||
|
@ -47,7 +48,8 @@ public class BasicStats extends Similarity.Stats {
|
||||||
protected float totalBoost;
|
protected float totalBoost;
|
||||||
|
|
||||||
/** Constructor. Sets the query boost. */
|
/** Constructor. Sets the query boost. */
|
||||||
public BasicStats(float queryBoost) {
|
public BasicStats(String field, float queryBoost) {
|
||||||
|
this.field = field;
|
||||||
this.queryBoost = queryBoost;
|
this.queryBoost = queryBoost;
|
||||||
this.totalBoost = queryBoost;
|
this.totalBoost = queryBoost;
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,8 +51,8 @@ public abstract class LMSimilarity extends SimilarityBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected BasicStats newStats(float queryBoost) {
|
protected BasicStats newStats(String field, float queryBoost) {
|
||||||
return new LMStats(queryBoost);
|
return new LMStats(field, queryBoost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -102,8 +102,8 @@ public abstract class LMSimilarity extends SimilarityBase {
|
||||||
/** The probability that the current term is generated by the collection. */
|
/** The probability that the current term is generated by the collection. */
|
||||||
private float collectionProbability;
|
private float collectionProbability;
|
||||||
|
|
||||||
public LMStats(float queryBoost) {
|
public LMStats(String field, float queryBoost) {
|
||||||
super(queryBoost);
|
super(field, queryBoost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -46,43 +46,43 @@ public class MultiSimilarity extends Similarity {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
Stats subStats[] = new Stats[sims.length];
|
SimWeight subStats[] = new SimWeight[sims.length];
|
||||||
for (int i = 0; i < subStats.length; i++) {
|
for (int i = 0; i < subStats.length; i++) {
|
||||||
subStats[i] = sims[i].computeStats(collectionStats, queryBoost, termStats);
|
subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
|
||||||
}
|
}
|
||||||
return new MultiStats(subStats);
|
return new MultiStats(subStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
ExactDocScorer subScorers[] = new ExactDocScorer[sims.length];
|
ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
|
||||||
for (int i = 0; i < subScorers.length; i++) {
|
for (int i = 0; i < subScorers.length; i++) {
|
||||||
subScorers[i] = sims[i].exactDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
|
subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
|
||||||
}
|
}
|
||||||
return new MultiExactDocScorer(subScorers);
|
return new MultiExactDocScorer(subScorers);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
SloppyDocScorer subScorers[] = new SloppyDocScorer[sims.length];
|
SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
|
||||||
for (int i = 0; i < subScorers.length; i++) {
|
for (int i = 0; i < subScorers.length; i++) {
|
||||||
subScorers[i] = sims[i].sloppyDocScorer(((MultiStats)stats).subStats[i], fieldName, context);
|
subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
|
||||||
}
|
}
|
||||||
return new MultiSloppyDocScorer(subScorers);
|
return new MultiSloppyDocScorer(subScorers);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MultiExactDocScorer extends ExactDocScorer {
|
public static class MultiExactDocScorer extends ExactSimScorer {
|
||||||
private final ExactDocScorer subScorers[];
|
private final ExactSimScorer subScorers[];
|
||||||
|
|
||||||
MultiExactDocScorer(ExactDocScorer subScorers[]) {
|
MultiExactDocScorer(ExactSimScorer subScorers[]) {
|
||||||
this.subScorers = subScorers;
|
this.subScorers = subScorers;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float score(int doc, int freq) {
|
public float score(int doc, int freq) {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
for (ExactDocScorer subScorer : subScorers) {
|
for (ExactSimScorer subScorer : subScorers) {
|
||||||
sum += subScorer.score(doc, freq);
|
sum += subScorer.score(doc, freq);
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
@ -91,24 +91,24 @@ public class MultiSimilarity extends Similarity {
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(int doc, Explanation freq) {
|
public Explanation explain(int doc, Explanation freq) {
|
||||||
Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
|
Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
|
||||||
for (ExactDocScorer subScorer : subScorers) {
|
for (ExactSimScorer subScorer : subScorers) {
|
||||||
expl.addDetail(subScorer.explain(doc, freq));
|
expl.addDetail(subScorer.explain(doc, freq));
|
||||||
}
|
}
|
||||||
return expl;
|
return expl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MultiSloppyDocScorer extends SloppyDocScorer {
|
public static class MultiSloppyDocScorer extends SloppySimScorer {
|
||||||
private final SloppyDocScorer subScorers[];
|
private final SloppySimScorer subScorers[];
|
||||||
|
|
||||||
MultiSloppyDocScorer(SloppyDocScorer subScorers[]) {
|
MultiSloppyDocScorer(SloppySimScorer subScorers[]) {
|
||||||
this.subScorers = subScorers;
|
this.subScorers = subScorers;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float score(int doc, float freq) {
|
public float score(int doc, float freq) {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
for (SloppyDocScorer subScorer : subScorers) {
|
for (SloppySimScorer subScorer : subScorers) {
|
||||||
sum += subScorer.score(doc, freq);
|
sum += subScorer.score(doc, freq);
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
@ -117,7 +117,7 @@ public class MultiSimilarity extends Similarity {
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(int doc, Explanation freq) {
|
public Explanation explain(int doc, Explanation freq) {
|
||||||
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
|
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
|
||||||
for (SloppyDocScorer subScorer : subScorers) {
|
for (SloppySimScorer subScorer : subScorers) {
|
||||||
expl.addDetail(subScorer.explain(doc, freq));
|
expl.addDetail(subScorer.explain(doc, freq));
|
||||||
}
|
}
|
||||||
return expl;
|
return expl;
|
||||||
|
@ -134,17 +134,17 @@ public class MultiSimilarity extends Similarity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MultiStats extends Stats {
|
public static class MultiStats extends SimWeight {
|
||||||
final Stats subStats[];
|
final SimWeight subStats[];
|
||||||
|
|
||||||
MultiStats(Stats subStats[]) {
|
MultiStats(SimWeight subStats[]) {
|
||||||
this.subStats = subStats;
|
this.subStats = subStats;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getValueForNormalization() {
|
public float getValueForNormalization() {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
for (Stats stat : subStats) {
|
for (SimWeight stat : subStats) {
|
||||||
sum += stat.getValueForNormalization();
|
sum += stat.getValueForNormalization();
|
||||||
}
|
}
|
||||||
return sum / subStats.length;
|
return sum / subStats.length;
|
||||||
|
@ -152,7 +152,7 @@ public class MultiSimilarity extends Similarity {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void normalize(float queryNorm, float topLevelBoost) {
|
public void normalize(float queryNorm, float topLevelBoost) {
|
||||||
for (Stats stat : subStats) {
|
for (SimWeight stat : subStats) {
|
||||||
stat.normalize(queryNorm, topLevelBoost);
|
stat.normalize(queryNorm, topLevelBoost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,16 +17,13 @@ package org.apache.lucene.search.similarities;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.document.DocValuesField; // javadoc
|
import org.apache.lucene.document.DocValuesField; // javadoc
|
||||||
import org.apache.lucene.index.AtomicReader; // javadoc
|
import org.apache.lucene.index.AtomicReader; // javadoc
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState;
|
||||||
import org.apache.lucene.index.IndexReader; // javadoc
|
|
||||||
import org.apache.lucene.index.Norm;
|
import org.apache.lucene.index.Norm;
|
||||||
import org.apache.lucene.index.Terms; // javadoc
|
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.CollectionStatistics;
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
|
@ -39,7 +36,6 @@ import org.apache.lucene.search.spans.SpanQuery; // javadoc
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.SmallFloat; // javadoc
|
import org.apache.lucene.util.SmallFloat; // javadoc
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Similarity defines the components of Lucene scoring.
|
* Similarity defines the components of Lucene scoring.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -59,21 +55,21 @@ import org.apache.lucene.util.SmallFloat; // javadoc
|
||||||
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing
|
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing
|
||||||
* the Similarity implementation to set a per-document value for the field that will
|
* the Similarity implementation to set a per-document value for the field that will
|
||||||
* be later accessible via {@link AtomicReader#normValues(String)}. Lucene makes no assumption
|
* be later accessible via {@link AtomicReader#normValues(String)}. Lucene makes no assumption
|
||||||
* about what is in this byte, but it is most useful for encoding length normalization
|
* about what is in this norm, but it is most useful for encoding length normalization
|
||||||
* information.
|
* information.
|
||||||
* <p>
|
* <p>
|
||||||
* Implementations should carefully consider how the normalization byte is encoded: while
|
* Implementations should carefully consider how the normalization is encoded: while
|
||||||
* Lucene's classical {@link TFIDFSimilarity} encodes a combination of index-time boost
|
* Lucene's classical {@link TFIDFSimilarity} encodes a combination of index-time boost
|
||||||
* and length normalization information with {@link SmallFloat}, this might not be suitable
|
* and length normalization information with {@link SmallFloat} into a single byte, this
|
||||||
* for all purposes.
|
* might not be suitable for all purposes.
|
||||||
* <p>
|
* <p>
|
||||||
* Many formulas require the use of average document length, which can be computed via a
|
* Many formulas require the use of average document length, which can be computed via a
|
||||||
* combination of {@link Terms#getSumTotalTermFreq()} and {@link IndexReader#maxDoc()},
|
* combination of {@link CollectionStatistics#sumTotalTermFreq()} and
|
||||||
|
* {@link CollectionStatistics#maxDoc()} or {@link CollectionStatistics#docCount()},
|
||||||
|
* depending upon whether the average should reflect field sparsity.
|
||||||
* <p>
|
* <p>
|
||||||
* Because index-time boost is handled entirely at the application level anyway,
|
* Additional scoring factors can be stored in named {@link DocValuesField}s, and accessed
|
||||||
* an application can alternatively store the index-time boost separately using an
|
* at query-time with {@link AtomicReader#docValues(String)}.
|
||||||
* {@link DocValuesField}, and access this at query-time with
|
|
||||||
* {@link AtomicReader#docValues(String)}.
|
|
||||||
* <p>
|
* <p>
|
||||||
* Finally, using index-time boosts (either via folding into the normalization byte or
|
* Finally, using index-time boosts (either via folding into the normalization byte or
|
||||||
* via DocValues), is an inefficient way to boost the scores of different fields if the
|
* via DocValues), is an inefficient way to boost the scores of different fields if the
|
||||||
|
@ -84,19 +80,19 @@ import org.apache.lucene.util.SmallFloat; // javadoc
|
||||||
* <a name="querytime"/>
|
* <a name="querytime"/>
|
||||||
* At query-time, Queries interact with the Similarity via these steps:
|
* At query-time, Queries interact with the Similarity via these steps:
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>The {@link #computeStats(CollectionStatistics, float, TermStatistics...)} method is called a single time,
|
* <li>The {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} method is called a single time,
|
||||||
* allowing the implementation to compute any statistics (such as IDF, average document length, etc)
|
* allowing the implementation to compute any statistics (such as IDF, average document length, etc)
|
||||||
* across <i>the entire collection</i>. The {@link TermStatistics} passed in already contain
|
* across <i>the entire collection</i>. The {@link TermStatistics} and {@link CollectionStatistics} passed in
|
||||||
* the raw statistics involved, so a Similarity can freely use any combination
|
* already contain all of the raw statistics involved, so a Similarity can freely use any combination
|
||||||
* of term statistics without causing any additional I/O. Lucene makes no assumption about what is
|
* of statistics without causing any additional I/O. Lucene makes no assumption about what is
|
||||||
* stored in the returned {@link Similarity.Stats} object.
|
* stored in the returned {@link Similarity.SimWeight} object.
|
||||||
* <li>The query normalization process occurs a single time: {@link Similarity.Stats#getValueForNormalization()}
|
* <li>The query normalization process occurs a single time: {@link Similarity.SimWeight#getValueForNormalization()}
|
||||||
* is called for each query leaf node, {@link SimilarityProvider#queryNorm(float)} is called for the top-level
|
* is called for each query leaf node, {@link SimilarityProvider#queryNorm(float)} is called for the top-level
|
||||||
* query, and finally {@link Similarity.Stats#normalize(float, float)} passes down the normalization value
|
* query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value
|
||||||
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
|
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
|
||||||
* <li>For each segment in the index, the Query creates a {@link #exactDocScorer(Stats, String, AtomicReaderContext)}
|
* <li>For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)}
|
||||||
* (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
|
* (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
|
||||||
* {@link #sloppyDocScorer(Stats, String, AtomicReaderContext)} (for queries with sloppy frequencies such as
|
* {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as
|
||||||
* SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
|
* SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
|
||||||
* </ol>
|
* </ol>
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -130,27 +126,40 @@ public abstract class Similarity {
|
||||||
public abstract void computeNorm(FieldInvertState state, Norm norm);
|
public abstract void computeNorm(FieldInvertState state, Norm norm);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute any collection-level stats (e.g. IDF, average document length, etc) needed for scoring a query.
|
* Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query.
|
||||||
|
*
|
||||||
|
* @param queryBoost the query-time boost.
|
||||||
|
* @param collectionStats collection-level statistics, such as the number of tokens in the collection.
|
||||||
|
* @param termStats term-level statistics, such as the document frequency of a term across the collection.
|
||||||
|
* @return SimWeight object with the information this Similarity needs to score a query.
|
||||||
*/
|
*/
|
||||||
public abstract Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats);
|
public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns a new {@link Similarity.ExactDocScorer}.
|
* Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index.
|
||||||
|
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
|
||||||
|
* @param context segment of the inverted index to be scored.
|
||||||
|
* @return ExactSimScorer for scoring documents across <code>context</code>
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public abstract ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException;
|
public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns a new {@link Similarity.SloppyDocScorer}.
|
* Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index.
|
||||||
|
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
|
||||||
|
* @param context segment of the inverted index to be scored.
|
||||||
|
* @return SloppySimScorer for scoring documents across <code>context</code>
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public abstract SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException;
|
public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* API for scoring exact queries such as {@link TermQuery} and
|
* API for scoring exact queries such as {@link TermQuery} and
|
||||||
* exact {@link PhraseQuery}.
|
* exact {@link PhraseQuery}.
|
||||||
* <p>
|
* <p>
|
||||||
* Term frequencies are integers (the term or phrase's tf)
|
* Frequencies are integers (the term or phrase frequency within the document)
|
||||||
*/
|
*/
|
||||||
public static abstract class ExactDocScorer {
|
public static abstract class ExactSimScorer {
|
||||||
/**
|
/**
|
||||||
* Score a single document
|
* Score a single document
|
||||||
* @param doc document id
|
* @param doc document id
|
||||||
|
@ -177,12 +186,14 @@ public abstract class Similarity {
|
||||||
* API for scoring "sloppy" queries such as {@link SpanQuery} and
|
* API for scoring "sloppy" queries such as {@link SpanQuery} and
|
||||||
* sloppy {@link PhraseQuery}.
|
* sloppy {@link PhraseQuery}.
|
||||||
* <p>
|
* <p>
|
||||||
* Term frequencies are floating point values.
|
* Frequencies are floating-point values: an approximate
|
||||||
|
* within-document frequency adjusted for "sloppiness" by
|
||||||
|
* {@link SloppySimScorer#computeSlopFactor(int)}.
|
||||||
*/
|
*/
|
||||||
public static abstract class SloppyDocScorer {
|
public static abstract class SloppySimScorer {
|
||||||
/**
|
/**
|
||||||
* Score a single document
|
* Score a single document
|
||||||
* @param doc document id
|
* @param doc document id within the inverted index segment
|
||||||
* @param freq sloppy term frequency
|
* @param freq sloppy term frequency
|
||||||
* @return document's score
|
* @return document's score
|
||||||
*/
|
*/
|
||||||
|
@ -196,7 +207,7 @@ public abstract class Similarity {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Explain the score for a single document
|
* Explain the score for a single document
|
||||||
* @param doc document id
|
* @param doc document id within the inverted index segment
|
||||||
* @param freq Explanation of how the sloppy term frequency was computed
|
* @param freq Explanation of how the sloppy term frequency was computed
|
||||||
* @return document's score
|
* @return document's score
|
||||||
*/
|
*/
|
||||||
|
@ -208,12 +219,12 @@ public abstract class Similarity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Stores the statistics for the indexed collection. This abstract
|
/** Stores the weight for a query across the indexed collection. This abstract
|
||||||
* implementation is empty; descendants of {@code Similarity} should
|
* implementation is empty; descendants of {@code Similarity} should
|
||||||
* subclass {@code Stats} and define the statistics they require in the
|
* subclass {@code SimWeight} and define the statistics they require in the
|
||||||
* subclass. Examples include idf, average field length, etc.
|
* subclass. Examples include idf, average field length, etc.
|
||||||
*/
|
*/
|
||||||
public static abstract class Stats {
|
public static abstract class SimWeight {
|
||||||
|
|
||||||
/** The value for normalization of contained query clauses (e.g. sum of squared weights).
|
/** The value for normalization of contained query clauses (e.g. sum of squared weights).
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -70,18 +70,18 @@ public abstract class SimilarityBase extends Similarity {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
BasicStats stats[] = new BasicStats[termStats.length];
|
BasicStats stats[] = new BasicStats[termStats.length];
|
||||||
for (int i = 0; i < termStats.length; i++) {
|
for (int i = 0; i < termStats.length; i++) {
|
||||||
stats[i] = newStats(queryBoost);
|
stats[i] = newStats(collectionStats.field(), queryBoost);
|
||||||
fillBasicStats(stats[i], collectionStats, termStats[i]);
|
fillBasicStats(stats[i], collectionStats, termStats[i]);
|
||||||
}
|
}
|
||||||
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
|
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Factory method to return a custom stats object */
|
/** Factory method to return a custom stats object */
|
||||||
protected BasicStats newStats(float queryBoost) {
|
protected BasicStats newStats(String field, float queryBoost) {
|
||||||
return new BasicStats(queryBoost);
|
return new BasicStats(field, queryBoost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||||
|
@ -179,40 +179,38 @@ public abstract class SimilarityBase extends Similarity {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
|
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
AtomicReaderContext context) throws IOException {
|
|
||||||
DocValues norms = context.reader().normValues(fieldName);
|
|
||||||
|
|
||||||
if (stats instanceof MultiSimilarity.MultiStats) {
|
if (stats instanceof MultiSimilarity.MultiStats) {
|
||||||
// a multi term query (e.g. phrase). return the summation,
|
// a multi term query (e.g. phrase). return the summation,
|
||||||
// scoring almost as if it were boolean query
|
// scoring almost as if it were boolean query
|
||||||
Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
||||||
ExactDocScorer subScorers[] = new ExactDocScorer[subStats.length];
|
ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
|
||||||
for (int i = 0; i < subScorers.length; i++) {
|
for (int i = 0; i < subScorers.length; i++) {
|
||||||
subScorers[i] = new BasicExactDocScorer((BasicStats)subStats[i], norms);
|
BasicStats basicstats = (BasicStats) subStats[i];
|
||||||
|
subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||||
}
|
}
|
||||||
return new MultiSimilarity.MultiExactDocScorer(subScorers);
|
return new MultiSimilarity.MultiExactDocScorer(subScorers);
|
||||||
} else {
|
} else {
|
||||||
return new BasicExactDocScorer((BasicStats) stats, norms);
|
BasicStats basicstats = (BasicStats) stats;
|
||||||
|
return new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
|
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
AtomicReaderContext context) throws IOException {
|
|
||||||
DocValues norms = context.reader().normValues(fieldName);
|
|
||||||
|
|
||||||
if (stats instanceof MultiSimilarity.MultiStats) {
|
if (stats instanceof MultiSimilarity.MultiStats) {
|
||||||
// a multi term query (e.g. phrase). return the summation,
|
// a multi term query (e.g. phrase). return the summation,
|
||||||
// scoring almost as if it were boolean query
|
// scoring almost as if it were boolean query
|
||||||
Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
|
||||||
SloppyDocScorer subScorers[] = new SloppyDocScorer[subStats.length];
|
SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
|
||||||
for (int i = 0; i < subScorers.length; i++) {
|
for (int i = 0; i < subScorers.length; i++) {
|
||||||
subScorers[i] = new BasicSloppyDocScorer((BasicStats)subStats[i], norms);
|
BasicStats basicstats = (BasicStats) subStats[i];
|
||||||
|
subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||||
}
|
}
|
||||||
return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
|
return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
|
||||||
} else {
|
} else {
|
||||||
return new BasicSloppyDocScorer((BasicStats) stats, norms);
|
BasicStats basicstats = (BasicStats) stats;
|
||||||
|
return new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,7 +272,7 @@ public abstract class SimilarityBase extends Similarity {
|
||||||
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
|
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
|
||||||
* respectively.
|
* respectively.
|
||||||
*/
|
*/
|
||||||
private class BasicExactDocScorer extends ExactDocScorer {
|
private class BasicExactDocScorer extends ExactSimScorer {
|
||||||
private final BasicStats stats;
|
private final BasicStats stats;
|
||||||
private final byte[] norms;
|
private final byte[] norms;
|
||||||
|
|
||||||
|
@ -303,7 +301,7 @@ public abstract class SimilarityBase extends Similarity {
|
||||||
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
|
* {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
|
||||||
* respectively.
|
* respectively.
|
||||||
*/
|
*/
|
||||||
private class BasicSloppyDocScorer extends SloppyDocScorer {
|
private class BasicSloppyDocScorer extends SloppySimScorer {
|
||||||
private final BasicStats stats;
|
private final BasicStats stats;
|
||||||
private final byte[] norms;
|
private final byte[] norms;
|
||||||
|
|
||||||
|
|
|
@ -694,26 +694,28 @@ public abstract class TFIDFSimilarity extends Similarity {
|
||||||
public abstract float scorePayload(int doc, int start, int end, BytesRef payload);
|
public abstract float scorePayload(int doc, int start, int end, BytesRef payload);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
final Explanation idf = termStats.length == 1
|
final Explanation idf = termStats.length == 1
|
||||||
? idfExplain(collectionStats, termStats[0])
|
? idfExplain(collectionStats, termStats[0])
|
||||||
: idfExplain(collectionStats, termStats);
|
: idfExplain(collectionStats, termStats);
|
||||||
return new IDFStats(idf, queryBoost);
|
return new IDFStats(collectionStats.field(), idf, queryBoost);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
return new ExactTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName));
|
IDFStats idfstats = (IDFStats) stats;
|
||||||
|
return new ExactTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
return new SloppyTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName));
|
IDFStats idfstats = (IDFStats) stats;
|
||||||
|
return new SloppyTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
|
// TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
|
||||||
|
|
||||||
private final class ExactTFIDFDocScorer extends ExactDocScorer {
|
private final class ExactTFIDFDocScorer extends ExactSimScorer {
|
||||||
private final IDFStats stats;
|
private final IDFStats stats;
|
||||||
private final float weightValue;
|
private final float weightValue;
|
||||||
private final byte[] norms;
|
private final byte[] norms;
|
||||||
|
@ -744,7 +746,7 @@ public abstract class TFIDFSimilarity extends Similarity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final class SloppyTFIDFDocScorer extends SloppyDocScorer {
|
private final class SloppyTFIDFDocScorer extends SloppySimScorer {
|
||||||
private final IDFStats stats;
|
private final IDFStats stats;
|
||||||
private final float weightValue;
|
private final float weightValue;
|
||||||
private final byte[] norms;
|
private final byte[] norms;
|
||||||
|
@ -780,7 +782,8 @@ public abstract class TFIDFSimilarity extends Similarity {
|
||||||
|
|
||||||
/** Collection statistics for the TF-IDF model. The only statistic of interest
|
/** Collection statistics for the TF-IDF model. The only statistic of interest
|
||||||
* to this model is idf. */
|
* to this model is idf. */
|
||||||
private static class IDFStats extends Stats {
|
private static class IDFStats extends SimWeight {
|
||||||
|
private final String field;
|
||||||
/** The idf and its explanation */
|
/** The idf and its explanation */
|
||||||
private final Explanation idf;
|
private final Explanation idf;
|
||||||
private float queryNorm;
|
private float queryNorm;
|
||||||
|
@ -788,8 +791,9 @@ public abstract class TFIDFSimilarity extends Similarity {
|
||||||
private final float queryBoost;
|
private final float queryBoost;
|
||||||
private float value;
|
private float value;
|
||||||
|
|
||||||
public IDFStats(Explanation idf, float queryBoost) {
|
public IDFStats(String field, Explanation idf, float queryBoost) {
|
||||||
// TODO: Validate?
|
// TODO: Validate?
|
||||||
|
this.field = field;
|
||||||
this.idf = idf;
|
this.idf = idf;
|
||||||
this.queryBoost = queryBoost;
|
this.queryBoost = queryBoost;
|
||||||
this.queryWeight = idf.getValue() * queryBoost; // compute query weight
|
this.queryWeight = idf.getValue() * queryBoost; // compute query weight
|
||||||
|
|
|
@ -33,9 +33,9 @@ public class SpanScorer extends Scorer {
|
||||||
|
|
||||||
protected int doc;
|
protected int doc;
|
||||||
protected float freq;
|
protected float freq;
|
||||||
protected final Similarity.SloppyDocScorer docScorer;
|
protected final Similarity.SloppySimScorer docScorer;
|
||||||
|
|
||||||
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppyDocScorer docScorer)
|
protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.TermContext;
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ public class SpanWeight extends Weight {
|
||||||
protected Similarity similarity;
|
protected Similarity similarity;
|
||||||
protected Map<Term,TermContext> termContexts;
|
protected Map<Term,TermContext> termContexts;
|
||||||
protected SpanQuery query;
|
protected SpanQuery query;
|
||||||
protected Similarity.Stats stats;
|
protected Similarity.SimWeight stats;
|
||||||
|
|
||||||
public SpanWeight(SpanQuery query, IndexSearcher searcher)
|
public SpanWeight(SpanQuery query, IndexSearcher searcher)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -57,9 +57,8 @@ public class SpanWeight extends Weight {
|
||||||
termContexts.put(term, state);
|
termContexts.put(term, state);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
stats = similarity.computeStats(
|
stats = similarity.computeWeight(query.getBoost(),
|
||||||
searcher.collectionStatistics(query.getField()),
|
searcher.collectionStatistics(query.getField()),
|
||||||
query.getBoost(),
|
|
||||||
termStats);
|
termStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +78,7 @@ public class SpanWeight extends Weight {
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -89,7 +88,7 @@ public class SpanWeight extends Weight {
|
||||||
int newDoc = scorer.advance(doc);
|
int newDoc = scorer.advance(doc);
|
||||||
if (newDoc == doc) {
|
if (newDoc == doc) {
|
||||||
float freq = scorer.freq();
|
float freq = scorer.freq();
|
||||||
SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context);
|
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||||
ComplexExplanation result = new ComplexExplanation();
|
ComplexExplanation result = new ComplexExplanation();
|
||||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||||
|
|
|
@ -191,7 +191,7 @@ final class JustCompileSearch {
|
||||||
static final class JustCompilePhraseScorer extends PhraseScorer {
|
static final class JustCompilePhraseScorer extends PhraseScorer {
|
||||||
|
|
||||||
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||||
Similarity.SloppyDocScorer docScorer) throws IOException {
|
Similarity.SloppySimScorer docScorer) throws IOException {
|
||||||
super(weight, postings, docScorer);
|
super(weight, postings, docScorer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -247,17 +247,17 @@ final class JustCompileSearch {
|
||||||
static final class JustCompileSimilarity extends Similarity {
|
static final class JustCompileSimilarity extends Similarity {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -158,16 +158,16 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
return sim.computeStats(collectionStats, queryBoost, termStats);
|
return sim.computeWeight(queryBoost, collectionStats, termStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
final ExactDocScorer sub = sim.exactDocScorer(stats, fieldName, context);
|
final ExactSimScorer sub = sim.exactSimScorer(stats, context);
|
||||||
final Source values = context.reader().docValues(boostField).getSource();
|
final Source values = context.reader().docValues(boostField).getSource();
|
||||||
|
|
||||||
return new ExactDocScorer() {
|
return new ExactSimScorer() {
|
||||||
@Override
|
@Override
|
||||||
public float score(int doc, int freq) {
|
public float score(int doc, int freq) {
|
||||||
return (float) values.getFloat(doc) * sub.score(doc, freq);
|
return (float) values.getFloat(doc) * sub.score(doc, freq);
|
||||||
|
@ -186,11 +186,11 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
|
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
|
||||||
final SloppyDocScorer sub = sim.sloppyDocScorer(stats, fieldName, context);
|
final SloppySimScorer sub = sim.sloppySimScorer(stats, context);
|
||||||
final Source values = context.reader().docValues(boostField).getSource();
|
final Source values = context.reader().docValues(boostField).getSource();
|
||||||
|
|
||||||
return new SloppyDocScorer() {
|
return new SloppySimScorer() {
|
||||||
@Override
|
@Override
|
||||||
public float score(int doc, float freq) {
|
public float score(int doc, float freq) {
|
||||||
return (float) values.getFloat(doc) * sub.score(doc, freq);
|
return (float) values.getFloat(doc) * sub.score(doc, freq);
|
||||||
|
|
|
@ -23,11 +23,9 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.OrdTermState;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.CollectionStatistics;
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
|
@ -40,8 +38,6 @@ import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TermContext;
|
|
||||||
import org.junit.Ignore;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and
|
* Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and
|
||||||
|
@ -167,7 +163,7 @@ public class TestSimilarityBase extends LuceneTestCase {
|
||||||
|
|
||||||
/** Creates the default statistics object that the specific tests modify. */
|
/** Creates the default statistics object that the specific tests modify. */
|
||||||
private BasicStats createStats() {
|
private BasicStats createStats() {
|
||||||
BasicStats stats = new BasicStats(1);
|
BasicStats stats = new BasicStats("spoof", 1);
|
||||||
stats.setNumberOfDocuments(NUMBER_OF_DOCUMENTS);
|
stats.setNumberOfDocuments(NUMBER_OF_DOCUMENTS);
|
||||||
stats.setNumberOfFieldTokens(NUMBER_OF_FIELD_TOKENS);
|
stats.setNumberOfFieldTokens(NUMBER_OF_FIELD_TOKENS);
|
||||||
stats.setAvgFieldLength(AVG_FIELD_LENGTH);
|
stats.setAvgFieldLength(AVG_FIELD_LENGTH);
|
||||||
|
@ -177,7 +173,7 @@ public class TestSimilarityBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
||||||
return new CollectionStatistics("spoof", stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private TermStatistics toTermStats(BasicStats stats) {
|
private TermStatistics toTermStats(BasicStats stats) {
|
||||||
|
@ -192,8 +188,8 @@ public class TestSimilarityBase extends LuceneTestCase {
|
||||||
private void unitTestCore(BasicStats stats, float freq, int docLen)
|
private void unitTestCore(BasicStats stats, float freq, int docLen)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
for (SimilarityBase sim : sims) {
|
for (SimilarityBase sim : sims) {
|
||||||
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats),
|
BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(),
|
||||||
stats.getTotalBoost(),
|
toCollectionStats(stats),
|
||||||
toTermStats(stats));
|
toTermStats(stats));
|
||||||
float score = sim.score(realStats, freq, docLen);
|
float score = sim.score(realStats, freq, docLen);
|
||||||
float explScore = sim.explain(
|
float explScore = sim.explain(
|
||||||
|
@ -525,8 +521,8 @@ public class TestSimilarityBase extends LuceneTestCase {
|
||||||
private void correctnessTestCore(SimilarityBase sim, float gold)
|
private void correctnessTestCore(SimilarityBase sim, float gold)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
BasicStats stats = createStats();
|
BasicStats stats = createStats();
|
||||||
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats),
|
BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(),
|
||||||
stats.getTotalBoost(),
|
toCollectionStats(stats),
|
||||||
toTermStats(stats));
|
toTermStats(stats));
|
||||||
float score = sim.score(realStats, FREQ, DOC_LEN);
|
float score = sim.score(realStats, FREQ, DOC_LEN);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
|
|
@ -139,7 +139,7 @@ final class JustCompileSearchSpans {
|
||||||
static final class JustCompileSpanScorer extends SpanScorer {
|
static final class JustCompileSpanScorer extends SpanScorer {
|
||||||
|
|
||||||
protected JustCompileSpanScorer(Spans spans, Weight weight,
|
protected JustCompileSpanScorer(Spans spans, Weight weight,
|
||||||
Similarity.SloppyDocScorer docScorer) throws IOException {
|
Similarity.SloppySimScorer docScorer) throws IOException {
|
||||||
super(spans, weight, docScorer);
|
super(spans, weight, docScorer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue