From 8fbd9d767344801e1915168cf7fa4bd404758de7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 3 Feb 2012 00:01:19 +0000 Subject: [PATCH] LUCENE-3749: Similarity.java javadocs and simplifications for 4.0 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1239941 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/search/BooleanQuery.java | 6 +- .../lucene/search/ConjunctionTermScorer.java | 6 +- .../lucene/search/ExactPhraseScorer.java | 4 +- .../lucene/search/MatchOnlyTermScorer.java | 6 +- .../lucene/search/MultiPhraseQuery.java | 15 ++-- .../org/apache/lucene/search/PhraseQuery.java | 12 +-- .../apache/lucene/search/PhraseScorer.java | 4 +- .../lucene/search/SloppyPhraseScorer.java | 2 +- .../org/apache/lucene/search/TermQuery.java | 16 ++-- .../org/apache/lucene/search/TermScorer.java | 6 +- .../search/payloads/PayloadNearQuery.java | 10 +-- .../search/payloads/PayloadTermQuery.java | 10 +-- .../search/similarities/BM25Similarity.java | 31 ++++--- .../search/similarities/BasicStats.java | 6 +- .../search/similarities/LMSimilarity.java | 8 +- .../search/similarities/MultiSimilarity.java | 48 +++++------ .../search/similarities/Similarity.java | 85 +++++++++++-------- .../search/similarities/SimilarityBase.java | 42 +++++---- .../search/similarities/TFIDFSimilarity.java | 24 +++--- .../lucene/search/spans/SpanScorer.java | 4 +- .../lucene/search/spans/SpanWeight.java | 11 ++- .../lucene/search/JustCompileSearch.java | 8 +- .../lucene/search/TestDocValuesScoring.java | 16 ++-- .../similarities/TestSimilarityBase.java | 16 ++-- .../search/spans/JustCompileSearchSpans.java | 2 +- 25 files changed, 207 insertions(+), 191 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java index bfd6b4afc6a..54768b2f69d 100644 --- a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs; import org.apache.lucene.search.TermQuery.TermWeight; -import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; +import org.apache.lucene.search.similarities.Similarity.ExactSimScorer; import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; @@ -362,7 +362,7 @@ public class BooleanQuery extends Query implements Iterable { if (termsEnum == null) { return null; } - final ExactDocScorer docScorer = weight.createDocScorer(context); + final ExactSimScorer docScorer = weight.createDocScorer(context); final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true); if (docsAndFreqsEnum == null) { // TODO: we could carry over TermState from the @@ -394,7 +394,7 @@ public class BooleanQuery extends Query implements Iterable { if (termsEnum == null) { return null; } - final ExactDocScorer docScorer = weight.createDocScorer(context); + final ExactSimScorer docScorer = weight.createDocScorer(context); docsAndFreqs[i] = new DocsAndFreqs(null, termsEnum.docs(acceptDocs, null, false), termsEnum.docFreq(), docScorer); diff --git a/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java b/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java index e7dad944b76..c10e708cca1 100644 --- a/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; +import org.apache.lucene.search.similarities.Similarity.ExactSimScorer; import org.apache.lucene.util.ArrayUtil; /** Scorer for conjunctions, sets of terms, all of which are required. */ @@ -100,10 +100,10 @@ class ConjunctionTermScorer extends Scorer { final DocsEnum docsAndFreqs; final DocsEnum docs; final int docFreq; - final ExactDocScorer docScorer; + final ExactSimScorer docScorer; int doc = -1; - DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) { + DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactSimScorer docScorer) { this.docsAndFreqs = docsAndFreqs; this.docs = docs; this.docFreq = docFreq; diff --git a/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 0a0454b7096..99715949d94 100644 --- a/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -55,10 +55,10 @@ final class ExactPhraseScorer extends Scorer { private int docID = -1; private int freq; - private final Similarity.ExactDocScorer docScorer; + private final Similarity.ExactSimScorer docScorer; ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.ExactDocScorer docScorer) throws IOException { + Similarity.ExactSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; diff --git a/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java b/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java index 2a4080da74c..01aaa782b09 100644 --- a/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java +++ b/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java @@ -29,7 +29,7 @@ import org.apache.lucene.search.similarities.Similarity; final class MatchOnlyTermScorer extends Scorer { private final DocsEnum docsEnum; - private final Similarity.ExactDocScorer docScorer; + private final Similarity.ExactSimScorer docScorer; /** * Construct a TermScorer. @@ -39,10 +39,10 @@ final class MatchOnlyTermScorer extends Scorer { * @param td * An iterator over the documents matching the Term. * @param docScorer - * The Similarity.ExactDocScorer implementation + * The Similarity.ExactSimScorer implementation * to be used for score computations. */ - MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { + MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.docsEnum = td; diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java index dc36958a60b..dafe4b1e6fa 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -30,7 +30,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; @@ -137,7 +137,7 @@ public class MultiPhraseQuery extends Query { private class MultiPhraseWeight extends Weight { private final Similarity similarity; - private final Similarity.Stats stats; + private final Similarity.SimWeight stats; private final Map termContexts = new HashMap(); public MultiPhraseWeight(IndexSearcher searcher) @@ -157,8 +157,9 @@ public class MultiPhraseQuery extends Query { allTermStats.add(searcher.termStatistics(term, termContext)); } } - stats = similarity.computeStats(searcher.collectionStatistics(field), - getBoost(), allTermStats.toArray(new TermStatistics[allTermStats.size()])); + stats = similarity.computeWeight(getBoost(), + searcher.collectionStatistics(field), + allTermStats.toArray(new TermStatistics[allTermStats.size()])); } @Override @@ -246,14 +247,14 @@ public class MultiPhraseQuery extends Query { } if (slop == 0) { - ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context)); + ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context)); if (s.noDocs) { return null; } else { return s; } } else { - return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context)); + return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context)); } } @@ -264,7 +265,7 @@ public class MultiPhraseQuery extends Query { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context); + SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index be407e301e5..33d5937542b 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -30,7 +30,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; @@ -183,7 +183,7 @@ public class PhraseQuery extends Query { private class PhraseWeight extends Weight { private final Similarity similarity; - private final Similarity.Stats stats; + private final Similarity.SimWeight stats; private transient TermContext states[]; public PhraseWeight(IndexSearcher searcher) @@ -197,7 +197,7 @@ public class PhraseQuery extends Query { states[i] = TermContext.build(context, term, true); termStats[i] = searcher.termStatistics(term, states[i]); } - stats = similarity.computeStats(searcher.collectionStatistics(field), getBoost(), termStats); + stats = similarity.computeWeight(getBoost(), searcher.collectionStatistics(field), termStats); } @Override @@ -258,7 +258,7 @@ public class PhraseQuery extends Query { } if (slop == 0) { // optimize exact case - ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactDocScorer(stats, field, context)); + ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context)); if (s.noDocs) { return null; } else { @@ -266,7 +266,7 @@ public class PhraseQuery extends Query { } } else { return - new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppyDocScorer(stats, field, context)); + new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context)); } } @@ -282,7 +282,7 @@ public class PhraseQuery extends Query { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, field, context); + SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); diff --git a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java index 3f2f6d8db96..d6aa6f568fe 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java @@ -36,10 +36,10 @@ abstract class PhraseScorer extends Scorer { private float freq; //phrase frequency in current doc as computed by phraseFreq(). - final Similarity.SloppyDocScorer docScorer; + final Similarity.SloppySimScorer docScorer; PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SloppyDocScorer docScorer) { + Similarity.SloppySimScorer docScorer) { super(weight); this.docScorer = docScorer; diff --git a/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index c123ef7c6cc..dbd5ca84d41 100644 --- a/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -30,7 +30,7 @@ final class SloppyPhraseScorer extends PhraseScorer { private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - int slop, Similarity.SloppyDocScorer docScorer) { + int slop, Similarity.SloppySimScorer docScorer) { super(weight, postings, docScorer); this.slop = slop; } diff --git a/lucene/src/java/org/apache/lucene/search/TermQuery.java b/lucene/src/java/org/apache/lucene/search/TermQuery.java index 0a1b5ccc936..a378a368d0f 100644 --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; +import org.apache.lucene.search.similarities.Similarity.ExactSimScorer; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -45,7 +45,7 @@ public class TermQuery extends Query { final class TermWeight extends Weight { private final Similarity similarity; - private final Similarity.Stats stats; + private final Similarity.SimWeight stats; private final TermContext termStates; public TermWeight(IndexSearcher searcher, TermContext termStates) @@ -53,9 +53,9 @@ public class TermQuery extends Query { assert termStates != null : "TermContext must not be null"; this.termStates = termStates; this.similarity = searcher.getSimilarityProvider().get(term.field()); - this.stats = similarity.computeStats( - searcher.collectionStatistics(term.field()), + this.stats = similarity.computeWeight( getBoost(), + searcher.collectionStatistics(term.field()), searcher.termStatistics(term, termStates)); } @@ -95,10 +95,10 @@ public class TermQuery extends Query { } /** - * Creates an {@link ExactDocScorer} for this {@link TermWeight}*/ - ExactDocScorer createDocScorer(AtomicReaderContext context) + * Creates an {@link ExactSimScorer} for this {@link TermWeight}*/ + ExactSimScorer createDocScorer(AtomicReaderContext context) throws IOException { - return similarity.exactDocScorer(stats, term.field(), context); + return similarity.exactSimScorer(stats, context); } /** @@ -130,7 +130,7 @@ public class TermQuery extends Query { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context); + ExactSimScorer docScorer = similarity.exactSimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq)); diff --git a/lucene/src/java/org/apache/lucene/search/TermScorer.java b/lucene/src/java/org/apache/lucene/search/TermScorer.java index 02f860f4333..e5f93d13daa 100644 --- a/lucene/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/src/java/org/apache/lucene/search/TermScorer.java @@ -26,7 +26,7 @@ import org.apache.lucene.search.similarities.Similarity; */ final class TermScorer extends Scorer { private final DocsEnum docsEnum; - private final Similarity.ExactDocScorer docScorer; + private final Similarity.ExactSimScorer docScorer; /** * Construct a TermScorer. @@ -36,10 +36,10 @@ final class TermScorer extends Scorer { * @param td * An iterator over the documents matching the Term. * @param docScorer - * The Similarity.ExactDocScorer implementation + * The Similarity.ExactSimScorer implementation * to be used for score computations. */ - TermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { + TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.docsEnum = td; diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index b132f92298e..beb1c4ab4c4 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -25,7 +25,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; import org.apache.lucene.search.spans.NearSpansOrdered; import org.apache.lucene.search.spans.NearSpansUnordered; import org.apache.lucene.search.spans.SpanNearQuery; @@ -53,7 +53,7 @@ import java.util.Iterator; *

* Payload scores are aggregated using a pluggable {@link PayloadFunction}. * - * @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef) + * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef) */ public class PayloadNearQuery extends SpanNearQuery { protected String fieldName; @@ -151,7 +151,7 @@ public class PayloadNearQuery extends SpanNearQuery { public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this, - similarity, similarity.sloppyDocScorer(stats, query.getField(), context)); + similarity, similarity.sloppySimScorer(stats, context)); } @Override @@ -161,7 +161,7 @@ public class PayloadNearQuery extends SpanNearQuery { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context); + SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); Explanation expl = new Explanation(); expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); @@ -189,7 +189,7 @@ public class PayloadNearQuery extends SpanNearQuery { private int payloadsSeen; protected PayloadNearSpanScorer(Spans spans, Weight weight, - Similarity similarity, Similarity.SloppyDocScorer docScorer) throws IOException { + Similarity similarity, Similarity.SloppySimScorer docScorer) throws IOException { super(spans, weight, docScorer); this.spans = spans; } diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 14db69321b1..fdaf74adf7f 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -28,7 +28,7 @@ import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.payloads.PayloadNearQuery.PayloadNearSpanScorer; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; import org.apache.lucene.search.spans.TermSpans; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanWeight; @@ -49,7 +49,7 @@ import java.io.IOException; * which returns 1 by default. *

* Payload scores are aggregated using a pluggable {@link PayloadFunction}. - * @see org.apache.lucene.search.similarities.Similarity.SloppyDocScorer#computePayloadFactor(int, int, int, BytesRef) + * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef) **/ public class PayloadTermQuery extends SpanTermQuery { protected PayloadFunction function; @@ -82,7 +82,7 @@ public class PayloadTermQuery extends SpanTermQuery { public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), - this, similarity.sloppyDocScorer(stats, query.getField(), context)); + this, similarity.sloppySimScorer(stats, context)); } protected class PayloadTermSpanScorer extends SpanScorer { @@ -91,7 +91,7 @@ public class PayloadTermQuery extends SpanTermQuery { protected int payloadsSeen; private final TermSpans termSpans; - public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppyDocScorer docScorer) throws IOException { + public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException { super(spans, weight, docScorer); termSpans = spans; } @@ -180,7 +180,7 @@ public class PayloadTermQuery extends SpanTermQuery { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context); + SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); Explanation expl = new Explanation(); expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); diff --git a/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java b/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java index ab2f3e8a59d..a0a4a9f9938 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java @@ -153,7 +153,7 @@ public class BM25Similarity extends Similarity { } @Override - public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { + public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); float avgdl = avgFieldLength(collectionStats); @@ -163,23 +163,25 @@ public class BM25Similarity extends Similarity { for (int i = 0; i < cache.length; i++) { cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl); } - return new BM25Stats(idf, queryBoost, avgdl, cache); + return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache); } @Override - public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - final DocValues norms = context.reader().normValues(fieldName); + public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + BM25Stats bm25stats = (BM25Stats) stats; + final DocValues norms = context.reader().normValues(bm25stats.field); return norms == null - ? new ExactBM25DocScorerNoNorms((BM25Stats)stats) - : new ExactBM25DocScorer((BM25Stats)stats, norms); + ? new ExactBM25DocScorerNoNorms(bm25stats) + : new ExactBM25DocScorer(bm25stats, norms); } @Override - public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - return new SloppyBM25DocScorer((BM25Stats) stats, context.reader().normValues(fieldName)); + public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + BM25Stats bm25stats = (BM25Stats) stats; + return new SloppyBM25DocScorer(bm25stats, context.reader().normValues(bm25stats.field)); } - private class ExactBM25DocScorer extends ExactDocScorer { + private class ExactBM25DocScorer extends ExactSimScorer { private final BM25Stats stats; private final float weightValue; private final byte[] norms; @@ -205,7 +207,7 @@ public class BM25Similarity extends Similarity { } /** there are no norms, we act as if b=0 */ - private class ExactBM25DocScorerNoNorms extends ExactDocScorer { + private class ExactBM25DocScorerNoNorms extends ExactSimScorer { private final BM25Stats stats; private final float weightValue; private static final int SCORE_CACHE_SIZE = 32; @@ -232,7 +234,7 @@ public class BM25Similarity extends Similarity { } } - private class SloppyBM25DocScorer extends SloppyDocScorer { + private class SloppyBM25DocScorer extends SloppySimScorer { private final BM25Stats stats; private final float weightValue; // boost * idf * (k1 + 1) private final byte[] norms; @@ -269,7 +271,7 @@ public class BM25Similarity extends Similarity { } /** Collection statistics for the BM25 model. */ - private static class BM25Stats extends Stats { + private static class BM25Stats extends SimWeight { /** BM25's idf */ private final Explanation idf; /** The average document length. */ @@ -280,10 +282,13 @@ public class BM25Similarity extends Similarity { private float topLevelBoost; /** weight (idf * boost) */ private float weight; + /** field name, for pulling norms */ + private final String field; /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */ private final float cache[]; - BM25Stats(Explanation idf, float queryBoost, float avgdl, float cache[]) { + BM25Stats(String field, Explanation idf, float queryBoost, float avgdl, float cache[]) { + this.field = field; this.idf = idf; this.queryBoost = queryBoost; this.avgdl = avgdl; diff --git a/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java b/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java index 7bde718d510..e438c266b3c 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java @@ -23,7 +23,8 @@ import org.apache.lucene.index.Terms; * Stores all statistics commonly used ranking methods. * @lucene.experimental */ -public class BasicStats extends Similarity.Stats { +public class BasicStats extends Similarity.SimWeight { + final String field; /** The number of documents. */ protected long numberOfDocuments; /** The total number of tokens in the field. */ @@ -47,7 +48,8 @@ public class BasicStats extends Similarity.Stats { protected float totalBoost; /** Constructor. Sets the query boost. */ - public BasicStats(float queryBoost) { + public BasicStats(String field, float queryBoost) { + this.field = field; this.queryBoost = queryBoost; this.totalBoost = queryBoost; } diff --git a/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java b/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java index da78d6bb9ea..db7607e05d8 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java @@ -51,8 +51,8 @@ public abstract class LMSimilarity extends SimilarityBase { } @Override - protected BasicStats newStats(float queryBoost) { - return new LMStats(queryBoost); + protected BasicStats newStats(String field, float queryBoost) { + return new LMStats(field, queryBoost); } /** @@ -102,8 +102,8 @@ public abstract class LMSimilarity extends SimilarityBase { /** The probability that the current term is generated by the collection. */ private float collectionProbability; - public LMStats(float queryBoost) { - super(queryBoost); + public LMStats(String field, float queryBoost) { + super(field, queryBoost); } /** diff --git a/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java b/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java index e5d357c83ce..25a0448aadf 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java @@ -46,43 +46,43 @@ public class MultiSimilarity extends Similarity { } @Override - public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { - Stats subStats[] = new Stats[sims.length]; + public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { + SimWeight subStats[] = new SimWeight[sims.length]; for (int i = 0; i < subStats.length; i++) { - subStats[i] = sims[i].computeStats(collectionStats, queryBoost, termStats); + subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats); } return new MultiStats(subStats); } @Override - public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - ExactDocScorer subScorers[] = new ExactDocScorer[sims.length]; + public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + ExactSimScorer subScorers[] = new ExactSimScorer[sims.length]; for (int i = 0; i < subScorers.length; i++) { - subScorers[i] = sims[i].exactDocScorer(((MultiStats)stats).subStats[i], fieldName, context); + subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context); } return new MultiExactDocScorer(subScorers); } @Override - public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - SloppyDocScorer subScorers[] = new SloppyDocScorer[sims.length]; + public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + SloppySimScorer subScorers[] = new SloppySimScorer[sims.length]; for (int i = 0; i < subScorers.length; i++) { - subScorers[i] = sims[i].sloppyDocScorer(((MultiStats)stats).subStats[i], fieldName, context); + subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context); } return new MultiSloppyDocScorer(subScorers); } - public static class MultiExactDocScorer extends ExactDocScorer { - private final ExactDocScorer subScorers[]; + public static class MultiExactDocScorer extends ExactSimScorer { + private final ExactSimScorer subScorers[]; - MultiExactDocScorer(ExactDocScorer subScorers[]) { + MultiExactDocScorer(ExactSimScorer subScorers[]) { this.subScorers = subScorers; } @Override public float score(int doc, int freq) { float sum = 0.0f; - for (ExactDocScorer subScorer : subScorers) { + for (ExactSimScorer subScorer : subScorers) { sum += subScorer.score(doc, freq); } return sum; @@ -91,24 +91,24 @@ public class MultiSimilarity extends Similarity { @Override public Explanation explain(int doc, Explanation freq) { Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:"); - for (ExactDocScorer subScorer : subScorers) { + for (ExactSimScorer subScorer : subScorers) { expl.addDetail(subScorer.explain(doc, freq)); } return expl; } } - public static class MultiSloppyDocScorer extends SloppyDocScorer { - private final SloppyDocScorer subScorers[]; + public static class MultiSloppyDocScorer extends SloppySimScorer { + private final SloppySimScorer subScorers[]; - MultiSloppyDocScorer(SloppyDocScorer subScorers[]) { + MultiSloppyDocScorer(SloppySimScorer subScorers[]) { this.subScorers = subScorers; } @Override public float score(int doc, float freq) { float sum = 0.0f; - for (SloppyDocScorer subScorer : subScorers) { + for (SloppySimScorer subScorer : subScorers) { sum += subScorer.score(doc, freq); } return sum; @@ -117,7 +117,7 @@ public class MultiSimilarity extends Similarity { @Override public Explanation explain(int doc, Explanation freq) { Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:"); - for (SloppyDocScorer subScorer : subScorers) { + for (SloppySimScorer subScorer : subScorers) { expl.addDetail(subScorer.explain(doc, freq)); } return expl; @@ -134,17 +134,17 @@ public class MultiSimilarity extends Similarity { } } - public static class MultiStats extends Stats { - final Stats subStats[]; + public static class MultiStats extends SimWeight { + final SimWeight subStats[]; - MultiStats(Stats subStats[]) { + MultiStats(SimWeight subStats[]) { this.subStats = subStats; } @Override public float getValueForNormalization() { float sum = 0.0f; - for (Stats stat : subStats) { + for (SimWeight stat : subStats) { sum += stat.getValueForNormalization(); } return sum / subStats.length; @@ -152,7 +152,7 @@ public class MultiSimilarity extends Similarity { @Override public void normalize(float queryNorm, float topLevelBoost) { - for (Stats stat : subStats) { + for (SimWeight stat : subStats) { stat.normalize(queryNorm, topLevelBoost); } } diff --git a/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java index 96603107bca..effe1fc4a7d 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java @@ -17,16 +17,13 @@ package org.apache.lucene.search.similarities; * limitations under the License. */ - import java.io.IOException; import org.apache.lucene.document.DocValuesField; // javadoc import org.apache.lucene.index.AtomicReader; // javadoc import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.IndexReader; // javadoc import org.apache.lucene.index.Norm; -import org.apache.lucene.index.Terms; // javadoc import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; @@ -39,7 +36,6 @@ import org.apache.lucene.search.spans.SpanQuery; // javadoc import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.SmallFloat; // javadoc - /** * Similarity defines the components of Lucene scoring. *

@@ -59,21 +55,21 @@ import org.apache.lucene.util.SmallFloat; // javadoc * At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing * the Similarity implementation to set a per-document value for the field that will * be later accessible via {@link AtomicReader#normValues(String)}. Lucene makes no assumption - * about what is in this byte, but it is most useful for encoding length normalization + * about what is in this norm, but it is most useful for encoding length normalization * information. *

- * Implementations should carefully consider how the normalization byte is encoded: while + * Implementations should carefully consider how the normalization is encoded: while * Lucene's classical {@link TFIDFSimilarity} encodes a combination of index-time boost - * and length normalization information with {@link SmallFloat}, this might not be suitable - * for all purposes. + * and length normalization information with {@link SmallFloat} into a single byte, this + * might not be suitable for all purposes. *

* Many formulas require the use of average document length, which can be computed via a - * combination of {@link Terms#getSumTotalTermFreq()} and {@link IndexReader#maxDoc()}, + * combination of {@link CollectionStatistics#sumTotalTermFreq()} and + * {@link CollectionStatistics#maxDoc()} or {@link CollectionStatistics#docCount()}, + * depending upon whether the average should reflect field sparsity. *

- * Because index-time boost is handled entirely at the application level anyway, - * an application can alternatively store the index-time boost separately using an - * {@link DocValuesField}, and access this at query-time with - * {@link AtomicReader#docValues(String)}. + * Additional scoring factors can be stored in named {@link DocValuesField}s, and accessed + * at query-time with {@link AtomicReader#docValues(String)}. *

* Finally, using index-time boosts (either via folding into the normalization byte or * via DocValues), is an inefficient way to boost the scores of different fields if the @@ -84,19 +80,19 @@ import org.apache.lucene.util.SmallFloat; // javadoc * * At query-time, Queries interact with the Similarity via these steps: *

    - *
  1. The {@link #computeStats(CollectionStatistics, float, TermStatistics...)} method is called a single time, + *
  2. The {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} method is called a single time, * allowing the implementation to compute any statistics (such as IDF, average document length, etc) - * across the entire collection. The {@link TermStatistics} passed in already contain - * the raw statistics involved, so a Similarity can freely use any combination - * of term statistics without causing any additional I/O. Lucene makes no assumption about what is - * stored in the returned {@link Similarity.Stats} object. - *
  3. The query normalization process occurs a single time: {@link Similarity.Stats#getValueForNormalization()} + * across the entire collection. The {@link TermStatistics} and {@link CollectionStatistics} passed in + * already contain all of the raw statistics involved, so a Similarity can freely use any combination + * of statistics without causing any additional I/O. Lucene makes no assumption about what is + * stored in the returned {@link Similarity.SimWeight} object. + *
  4. The query normalization process occurs a single time: {@link Similarity.SimWeight#getValueForNormalization()} * is called for each query leaf node, {@link SimilarityProvider#queryNorm(float)} is called for the top-level - * query, and finally {@link Similarity.Stats#normalize(float, float)} passes down the normalization value + * query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value * and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s). - *
  5. For each segment in the index, the Query creates a {@link #exactDocScorer(Stats, String, AtomicReaderContext)} + *
  6. For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)} * (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a - * {@link #sloppyDocScorer(Stats, String, AtomicReaderContext)} (for queries with sloppy frequencies such as + * {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as * SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document. *
*

@@ -130,27 +126,40 @@ public abstract class Similarity { public abstract void computeNorm(FieldInvertState state, Norm norm); /** - * Compute any collection-level stats (e.g. IDF, average document length, etc) needed for scoring a query. + * Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query. + * + * @param queryBoost the query-time boost. + * @param collectionStats collection-level statistics, such as the number of tokens in the collection. + * @param termStats term-level statistics, such as the document frequency of a term across the collection. + * @return SimWeight object with the information this Similarity needs to score a query. */ - public abstract Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats); + public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats); /** - * returns a new {@link Similarity.ExactDocScorer}. + * Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index. + * @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} + * @param context segment of the inverted index to be scored. + * @return ExactSimScorer for scoring documents across context + * @throws IOException */ - public abstract ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException; + public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException; /** - * returns a new {@link Similarity.SloppyDocScorer}. + * Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index. + * @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)} + * @param context segment of the inverted index to be scored. + * @return SloppySimScorer for scoring documents across context + * @throws IOException */ - public abstract SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException; + public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException; /** * API for scoring exact queries such as {@link TermQuery} and * exact {@link PhraseQuery}. *

- * Term frequencies are integers (the term or phrase's tf) + * Frequencies are integers (the term or phrase frequency within the document) */ - public static abstract class ExactDocScorer { + public static abstract class ExactSimScorer { /** * Score a single document * @param doc document id @@ -177,12 +186,14 @@ public abstract class Similarity { * API for scoring "sloppy" queries such as {@link SpanQuery} and * sloppy {@link PhraseQuery}. *

- * Term frequencies are floating point values. + * Frequencies are floating-point values: an approximate + * within-document frequency adjusted for "sloppiness" by + * {@link SloppySimScorer#computeSlopFactor(int)}. */ - public static abstract class SloppyDocScorer { + public static abstract class SloppySimScorer { /** * Score a single document - * @param doc document id + * @param doc document id within the inverted index segment * @param freq sloppy term frequency * @return document's score */ @@ -196,7 +207,7 @@ public abstract class Similarity { /** * Explain the score for a single document - * @param doc document id + * @param doc document id within the inverted index segment * @param freq Explanation of how the sloppy term frequency was computed * @return document's score */ @@ -208,12 +219,12 @@ public abstract class Similarity { } } - /** Stores the statistics for the indexed collection. This abstract + /** Stores the weight for a query across the indexed collection. This abstract * implementation is empty; descendants of {@code Similarity} should - * subclass {@code Stats} and define the statistics they require in the + * subclass {@code SimWeight} and define the statistics they require in the * subclass. Examples include idf, average field length, etc. */ - public static abstract class Stats { + public static abstract class SimWeight { /** The value for normalization of contained query clauses (e.g. sum of squared weights). *

diff --git a/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java b/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java index 9e9ddc7e068..a2496847b4e 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java @@ -70,18 +70,18 @@ public abstract class SimilarityBase extends Similarity { } @Override - public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { + public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { BasicStats stats[] = new BasicStats[termStats.length]; for (int i = 0; i < termStats.length; i++) { - stats[i] = newStats(queryBoost); + stats[i] = newStats(collectionStats.field(), queryBoost); fillBasicStats(stats[i], collectionStats, termStats[i]); } return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats); } /** Factory method to return a custom stats object */ - protected BasicStats newStats(float queryBoost) { - return new BasicStats(queryBoost); + protected BasicStats newStats(String field, float queryBoost) { + return new BasicStats(field, queryBoost); } /** Fills all member fields defined in {@code BasicStats} in {@code stats}. @@ -179,40 +179,38 @@ public abstract class SimilarityBase extends Similarity { } @Override - public ExactDocScorer exactDocScorer(Stats stats, String fieldName, - AtomicReaderContext context) throws IOException { - DocValues norms = context.reader().normValues(fieldName); - + public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { if (stats instanceof MultiSimilarity.MultiStats) { // a multi term query (e.g. phrase). return the summation, // scoring almost as if it were boolean query - Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; - ExactDocScorer subScorers[] = new ExactDocScorer[subStats.length]; + SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; + ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length]; for (int i = 0; i < subScorers.length; i++) { - subScorers[i] = new BasicExactDocScorer((BasicStats)subStats[i], norms); + BasicStats basicstats = (BasicStats) subStats[i]; + subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field)); } return new MultiSimilarity.MultiExactDocScorer(subScorers); } else { - return new BasicExactDocScorer((BasicStats) stats, norms); + BasicStats basicstats = (BasicStats) stats; + return new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field)); } } @Override - public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, - AtomicReaderContext context) throws IOException { - DocValues norms = context.reader().normValues(fieldName); - + public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { if (stats instanceof MultiSimilarity.MultiStats) { // a multi term query (e.g. phrase). return the summation, // scoring almost as if it were boolean query - Stats subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; - SloppyDocScorer subScorers[] = new SloppyDocScorer[subStats.length]; + SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats; + SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length]; for (int i = 0; i < subScorers.length; i++) { - subScorers[i] = new BasicSloppyDocScorer((BasicStats)subStats[i], norms); + BasicStats basicstats = (BasicStats) subStats[i]; + subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field)); } return new MultiSimilarity.MultiSloppyDocScorer(subScorers); } else { - return new BasicSloppyDocScorer((BasicStats) stats, norms); + BasicStats basicstats = (BasicStats) stats; + return new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field)); } } @@ -274,7 +272,7 @@ public abstract class SimilarityBase extends Similarity { * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)}, * respectively. */ - private class BasicExactDocScorer extends ExactDocScorer { + private class BasicExactDocScorer extends ExactSimScorer { private final BasicStats stats; private final byte[] norms; @@ -303,7 +301,7 @@ public abstract class SimilarityBase extends Similarity { * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)}, * respectively. */ - private class BasicSloppyDocScorer extends SloppyDocScorer { + private class BasicSloppyDocScorer extends SloppySimScorer { private final BasicStats stats; private final byte[] norms; diff --git a/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java index 95b4167a5dd..a7e91ceb4d5 100644 --- a/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java @@ -694,26 +694,28 @@ public abstract class TFIDFSimilarity extends Similarity { public abstract float scorePayload(int doc, int start, int end, BytesRef payload); @Override - public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { + public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { final Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); - return new IDFStats(idf, queryBoost); + return new IDFStats(collectionStats.field(), idf, queryBoost); } @Override - public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - return new ExactTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName)); + public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + IDFStats idfstats = (IDFStats) stats; + return new ExactTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field)); } @Override - public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - return new SloppyTFIDFDocScorer((IDFStats)stats, context.reader().normValues(fieldName)); + public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + IDFStats idfstats = (IDFStats) stats; + return new SloppyTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field)); } // TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot. - private final class ExactTFIDFDocScorer extends ExactDocScorer { + private final class ExactTFIDFDocScorer extends ExactSimScorer { private final IDFStats stats; private final float weightValue; private final byte[] norms; @@ -744,7 +746,7 @@ public abstract class TFIDFSimilarity extends Similarity { } } - private final class SloppyTFIDFDocScorer extends SloppyDocScorer { + private final class SloppyTFIDFDocScorer extends SloppySimScorer { private final IDFStats stats; private final float weightValue; private final byte[] norms; @@ -780,7 +782,8 @@ public abstract class TFIDFSimilarity extends Similarity { /** Collection statistics for the TF-IDF model. The only statistic of interest * to this model is idf. */ - private static class IDFStats extends Stats { + private static class IDFStats extends SimWeight { + private final String field; /** The idf and its explanation */ private final Explanation idf; private float queryNorm; @@ -788,8 +791,9 @@ public abstract class TFIDFSimilarity extends Similarity { private final float queryBoost; private float value; - public IDFStats(Explanation idf, float queryBoost) { + public IDFStats(String field, Explanation idf, float queryBoost) { // TODO: Validate? + this.field = field; this.idf = idf; this.queryBoost = queryBoost; this.queryWeight = idf.getValue() * queryBoost; // compute query weight diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java index d97307a150f..28e61ef9297 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -33,9 +33,9 @@ public class SpanScorer extends Scorer { protected int doc; protected float freq; - protected final Similarity.SloppyDocScorer docScorer; + protected final Similarity.SloppySimScorer docScorer; - protected SpanScorer(Spans spans, Weight weight, Similarity.SloppyDocScorer docScorer) + protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java index b987505f7cf..98272cf278d 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -22,7 +22,7 @@ import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.TermContext; @@ -38,7 +38,7 @@ public class SpanWeight extends Weight { protected Similarity similarity; protected Map termContexts; protected SpanQuery query; - protected Similarity.Stats stats; + protected Similarity.SimWeight stats; public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { @@ -57,9 +57,8 @@ public class SpanWeight extends Weight { termContexts.put(term, state); i++; } - stats = similarity.computeStats( + stats = similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(query.getField()), - query.getBoost(), termStats); } @@ -79,7 +78,7 @@ public class SpanWeight extends Weight { @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context)); + return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context)); } @Override @@ -89,7 +88,7 @@ public class SpanWeight extends Weight { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); - SloppyDocScorer docScorer = similarity.sloppyDocScorer(stats, query.getField(), context); + SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); diff --git a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java index 75673c562cd..bd3ba7fc911 100644 --- a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -191,7 +191,7 @@ final class JustCompileSearch { static final class JustCompilePhraseScorer extends PhraseScorer { JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SloppyDocScorer docScorer) throws IOException { + Similarity.SloppySimScorer docScorer) throws IOException { super(weight, postings, docScorer); } @@ -247,17 +247,17 @@ final class JustCompileSearch { static final class JustCompileSimilarity extends Similarity { @Override - public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { + public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { + public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { + public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java index 823d85fddd8..0184649eb01 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java @@ -158,16 +158,16 @@ public class TestDocValuesScoring extends LuceneTestCase { } @Override - public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) { - return sim.computeStats(collectionStats, queryBoost, termStats); + public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return sim.computeWeight(queryBoost, collectionStats, termStats); } @Override - public ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - final ExactDocScorer sub = sim.exactDocScorer(stats, fieldName, context); + public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + final ExactSimScorer sub = sim.exactSimScorer(stats, context); final Source values = context.reader().docValues(boostField).getSource(); - return new ExactDocScorer() { + return new ExactSimScorer() { @Override public float score(int doc, int freq) { return (float) values.getFloat(doc) * sub.score(doc, freq); @@ -186,11 +186,11 @@ public class TestDocValuesScoring extends LuceneTestCase { } @Override - public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException { - final SloppyDocScorer sub = sim.sloppyDocScorer(stats, fieldName, context); + public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException { + final SloppySimScorer sub = sim.sloppySimScorer(stats, context); final Source values = context.reader().docValues(boostField).getSource(); - return new SloppyDocScorer() { + return new SloppySimScorer() { @Override public float score(int doc, float freq) { return (float) values.getFloat(doc) * sub.score(doc, freq); diff --git a/lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java b/lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java index 09d7b0bbd81..27bebbe9760 100644 --- a/lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java +++ b/lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java @@ -23,11 +23,9 @@ import java.util.List; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.CollectionStatistics; @@ -40,8 +38,6 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.TermContext; -import org.junit.Ignore; /** * Tests the {@link SimilarityBase}-based Similarities. Contains unit tests and @@ -167,7 +163,7 @@ public class TestSimilarityBase extends LuceneTestCase { /** Creates the default statistics object that the specific tests modify. */ private BasicStats createStats() { - BasicStats stats = new BasicStats(1); + BasicStats stats = new BasicStats("spoof", 1); stats.setNumberOfDocuments(NUMBER_OF_DOCUMENTS); stats.setNumberOfFieldTokens(NUMBER_OF_FIELD_TOKENS); stats.setAvgFieldLength(AVG_FIELD_LENGTH); @@ -177,7 +173,7 @@ public class TestSimilarityBase extends LuceneTestCase { } private CollectionStatistics toCollectionStats(BasicStats stats) { - return new CollectionStatistics("spoof", stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1); + return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1); } private TermStatistics toTermStats(BasicStats stats) { @@ -192,8 +188,8 @@ public class TestSimilarityBase extends LuceneTestCase { private void unitTestCore(BasicStats stats, float freq, int docLen) throws IOException { for (SimilarityBase sim : sims) { - BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats), - stats.getTotalBoost(), + BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(), + toCollectionStats(stats), toTermStats(stats)); float score = sim.score(realStats, freq, docLen); float explScore = sim.explain( @@ -525,8 +521,8 @@ public class TestSimilarityBase extends LuceneTestCase { private void correctnessTestCore(SimilarityBase sim, float gold) throws IOException { BasicStats stats = createStats(); - BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats), - stats.getTotalBoost(), + BasicStats realStats = (BasicStats) sim.computeWeight(stats.getTotalBoost(), + toCollectionStats(stats), toTermStats(stats)); float score = sim.score(realStats, FREQ, DOC_LEN); assertEquals( diff --git a/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 2f249e502d0..d33ae3e3a23 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -139,7 +139,7 @@ final class JustCompileSearchSpans { static final class JustCompileSpanScorer extends SpanScorer { protected JustCompileSpanScorer(Spans spans, Weight weight, - Similarity.SloppyDocScorer docScorer) throws IOException { + Similarity.SloppySimScorer docScorer) throws IOException { super(spans, weight, docScorer); }