LUCENE-8313: Simplify SimScorer.

This commit is contained in:
Adrien Grand 2018-05-16 17:21:58 +02:00
parent bd20cb3c87
commit 9b9776a714
26 changed files with 64 additions and 116 deletions

View File

@ -209,7 +209,7 @@ public final class FeatureField extends Field {
}
static abstract class FeatureFunction {
abstract SimScorer scorer(String field, float w);
abstract SimScorer scorer(float w);
abstract Explanation explain(String field, String feature, float w, int freq);
FeatureFunction rewrite(IndexReader reader) throws IOException { return this; }
}
@ -242,8 +242,8 @@ public final class FeatureField extends Field {
}
@Override
SimScorer scorer(String field, float weight) {
return new SimScorer(field) {
SimScorer scorer(float weight) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
return (float) (weight * Math.log(scalingFactor + decodeFeatureValue(freq)));
@ -254,7 +254,7 @@ public final class FeatureField extends Field {
@Override
Explanation explain(String field, String feature, float w, int freq) {
float featureValue = decodeFeatureValue(freq);
float score = scorer(field, w).score(freq, 1L);
float score = scorer(w).score(freq, 1L);
return Explanation.match(score,
"Log function on the " + field + " field for the " + feature + " feature, computed as w * log(a + S) from:",
Explanation.match(w, "w, weight of this function"),
@ -305,12 +305,12 @@ public final class FeatureField extends Field {
}
@Override
SimScorer scorer(String field, float weight) {
SimScorer scorer(float weight) {
if (pivot == null) {
throw new IllegalStateException("Rewrite first");
}
final float pivot = this.pivot; // unbox
return new SimScorer(field) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
float f = decodeFeatureValue(freq);
@ -325,7 +325,7 @@ public final class FeatureField extends Field {
@Override
Explanation explain(String field, String feature, float weight, int freq) {
float featureValue = decodeFeatureValue(freq);
float score = scorer(field, weight).score(freq, 1L);
float score = scorer(weight).score(freq, 1L);
return Explanation.match(score,
"Saturation function on the " + field + " field for the " + feature + " feature, computed as w * S / (S + k) from:",
Explanation.match(weight, "w, weight of this function"),
@ -368,8 +368,8 @@ public final class FeatureField extends Field {
}
@Override
SimScorer scorer(String field, float weight) {
return new SimScorer(field) {
SimScorer scorer(float weight) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
float f = decodeFeatureValue(freq);
@ -384,7 +384,7 @@ public final class FeatureField extends Field {
@Override
Explanation explain(String field, String feature, float weight, int freq) {
float featureValue = decodeFeatureValue(freq);
float score = scorer(field, weight).score(freq, 1L);
float score = scorer(weight).score(freq, 1L);
return Explanation.match(score,
"Sigmoid function on the " + field + " field for the " + feature + " feature, computed as w * S^a / (S^a + k^a) from:",
Explanation.match(weight, "w, weight of this function"),

View File

@ -133,7 +133,7 @@ final class FeatureQuery extends Query {
return null;
}
SimScorer scorer = function.scorer(fieldName, boost);
SimScorer scorer = function.scorer(boost);
ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);

View File

@ -17,6 +17,7 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
@ -29,15 +30,13 @@ public final class LeafSimScorer {
private final SimScorer scorer;
private final NumericDocValues norms;
private final float maxScore;
/**
* Sole constructor: Score documents of {@code reader} with {@code scorer}.
*/
public LeafSimScorer(SimScorer scorer, LeafReader reader, boolean needsScores, float maxFreq) throws IOException {
this.scorer = scorer;
norms = needsScores ? reader.getNormValues(scorer.getField()) : null;
maxScore = needsScores ? scorer.score(maxFreq, 1) : Float.MAX_VALUE;
public LeafSimScorer(SimScorer scorer, LeafReader reader, String field, boolean needsScores) throws IOException {
this.scorer = Objects.requireNonNull(scorer);
norms = needsScores ? reader.getNormValues(field) : null;
}
/** Return the wrapped {@link SimScorer}. */
@ -69,10 +68,4 @@ public final class LeafSimScorer {
return scorer.explain(freqExpl, getNormValue(doc));
}
/**
* Return an upper bound of the score.
*/
public float maxScore() {
return maxScore;
}
}

View File

@ -89,7 +89,8 @@ class PhraseScorer extends Scorer {
@Override
public float getMaxScore(int upTo) throws IOException {
return simScorer.maxScore();
// TODO: merge impacts of all clauses to get better score upper bounds
return simScorer.getSimScorer().score(Integer.MAX_VALUE, 1L);
}
@Override

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
abstract class PhraseWeight extends Weight {
@ -34,7 +35,16 @@ abstract class PhraseWeight extends Weight {
this.scoreMode = scoreMode;
this.field = field;
this.similarity = searcher.getSimilarity();
this.stats = getStats(searcher);
SimScorer stats = getStats(searcher);
if (stats == null) { // Means no terms or scores are not needed
stats = new SimScorer() {
@Override
public float score(float freq, long norm) {
return 1;
}
};
}
this.stats = stats;
}
protected abstract Similarity.SimScorer getStats(IndexSearcher searcher) throws IOException;
@ -46,7 +56,7 @@ abstract class PhraseWeight extends Weight {
PhraseMatcher matcher = getPhraseMatcher(context, false);
if (matcher == null)
return null;
LeafSimScorer simScorer = new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Integer.MAX_VALUE);
LeafSimScorer simScorer = new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
return new PhraseScorer(this, matcher, scoreMode, simScorer);
}
@ -64,7 +74,7 @@ abstract class PhraseWeight extends Weight {
while (matcher.nextMatch()) {
freq += matcher.sloppyWeight();
}
LeafSimScorer docScorer = new LeafSimScorer(stats, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE);
LeafSimScorer docScorer = new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
return Explanation.match(

View File

@ -25,8 +25,6 @@ import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -184,7 +182,7 @@ public final class SynonymQuery extends Query {
assert scorer instanceof TermScorer;
freq = ((TermScorer)scorer).freq();
}
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), true, Float.MAX_VALUE);
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
return Explanation.match(
@ -199,26 +197,14 @@ public final class SynonymQuery extends Query {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
IndexOptions indexOptions = IndexOptions.NONE;
if (terms.length > 0) {
FieldInfo info = context.reader()
.getFieldInfos()
.fieldInfo(terms[0].field());
if (info != null) {
indexOptions = info.getIndexOptions();
}
}
// we use termscorers + disjunction as an impl detail
List<Scorer> subScorers = new ArrayList<>();
long totalMaxFreq = 0;
for (int i = 0; i < terms.length; i++) {
TermState state = termStates[i].get(context);
if (state != null) {
TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator();
termsEnum.seekExact(terms[i].bytes(), state);
long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq());
totalMaxFreq += termMaxFreq;
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq);
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer));
}
}
@ -228,7 +214,7 @@ public final class SynonymQuery extends Query {
// we must optimize this case (term not in segment), disjunctionscorer requires >= 2 subs
return subScorers.get(0);
} else {
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, totalMaxFreq);
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
return new SynonymScorer(simScorer, this, subScorers);
}
}
@ -240,17 +226,6 @@ public final class SynonymQuery extends Query {
}
private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) {
// TODO: store the max term freq?
if (indexOptions.compareTo(IndexOptions.DOCS) <= 0) {
// omitTFAP field, tf values are implicitly 1.
return 1;
} else {
assert ttf >= 0;
return Math.min(Integer.MAX_VALUE, ttf - df + 1);
}
}
static class SynonymScorer extends DisjunctionScorer {
private final LeafSimScorer similarity;
@ -266,7 +241,8 @@ public final class SynonymQuery extends Query {
@Override
public float getMaxScore(int upTo) throws IOException {
return similarity.maxScore();
// TODO: merge impacts to get better score upper bounds
return similarity.getSimScorer().score(Float.MAX_VALUE, 1L);
}
/** combines TF of all subs. */

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -111,26 +110,10 @@ public class TermQuery extends Query {
if (termsEnum == null) {
return null;
}
IndexOptions indexOptions = context.reader()
.getFieldInfos()
.fieldInfo(getTerm().field())
.getIndexOptions();
float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq());
LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq);
LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
return new TermScorer(this, termsEnum, scoreMode, scorer);
}
private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) {
// TODO: store the max term freq?
if (indexOptions.compareTo(IndexOptions.DOCS) <= 0) {
// omitTFAP field, tf values are implicitly 1.
return 1;
} else {
assert ttf >= 0;
return Math.min(Integer.MAX_VALUE, ttf - df + 1);
}
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
@ -168,7 +151,7 @@ public class TermQuery extends Query {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), true, Integer.MAX_VALUE);
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), term.field(), true);
Explanation freqExplanation = Explanation.match(freq, "freq, occurrences of term within document");
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
return Explanation.match(

View File

@ -189,7 +189,7 @@ public class BM25Similarity extends Similarity {
for (int i = 0; i < cache.length; i++) {
cache[i] = k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl);
}
return new BM25Scorer(collectionStats.field(), boost, k1, b, idf, avgdl, cache);
return new BM25Scorer(boost, k1, b, idf, avgdl, cache);
}
/** Collection statistics for the BM25 model. */
@ -209,8 +209,7 @@ public class BM25Similarity extends Similarity {
/** weight (idf * boost) */
private final float weight;
BM25Scorer(String field, float boost, float k1, float b, Explanation idf, float avgdl, float[] cache) {
super(field);
BM25Scorer(float boost, float k1, float b, Explanation idf, float avgdl, float[] cache) {
this.boost = boost;
this.idf = idf;
this.avgdl = avgdl;

View File

@ -45,14 +45,13 @@ public class BooleanSimilarity extends Similarity {
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new BooleanWeight(collectionStats.field(), boost);
return new BooleanWeight(boost);
}
private static class BooleanWeight extends SimScorer {
final float boost;
BooleanWeight(String field, float boost) {
super(field);
BooleanWeight(float boost) {
this.boost = boost;
}

View File

@ -52,14 +52,13 @@ public class MultiSimilarity extends Similarity {
for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].scorer(boost, collectionStats, termStats);
}
return new MultiSimScorer(collectionStats.field(), subScorers);
return new MultiSimScorer(subScorers);
}
static class MultiSimScorer extends SimScorer {
private final SimScorer subScorers[];
MultiSimScorer(String field, SimScorer subScorers[]) {
super(field);
MultiSimScorer(SimScorer subScorers[]) {
this.subScorers = subScorers;
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search.similarities;
import java.util.Collections;
import java.util.Objects;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.FieldInvertState;
@ -141,20 +140,11 @@ public abstract class Similarity {
*/
public static abstract class SimScorer {
private final String field;
/**
* Sole constructor. (For invocation by subclass
* constructors.)
*/
public SimScorer(String field) {
this.field = Objects.requireNonNull(field);
}
/** Return the field that this {@link SimScorer} operates on. */
public final String getField() {
return field;
}
protected SimScorer() {}
/**
* Score a single document. {@code freq} is the document-term sloppy

View File

@ -90,7 +90,7 @@ public abstract class SimilarityBase extends Similarity {
if (weights.length == 1) {
return weights[0];
} else {
return new MultiSimilarity.MultiSimScorer(collectionStats.field(), weights);
return new MultiSimilarity.MultiSimScorer(weights);
}
}
@ -216,7 +216,6 @@ public abstract class SimilarityBase extends Similarity {
final BasicStats stats;
BasicSimScorer(BasicStats stats) {
super(stats.field);
this.stats = stats;
}

View File

@ -523,7 +523,7 @@ public abstract class TFIDFSimilarity extends Similarity {
normTable[i] = norm;
}
normTable[0] = 1f / normTable[255];
return new TFIDFScorer(collectionStats.field(), boost, idf, normTable);
return new TFIDFScorer(boost, idf, normTable);
}
@ -536,8 +536,7 @@ public abstract class TFIDFSimilarity extends Similarity {
private final float queryWeight;
final float[] normTable;
public TFIDFScorer(String field, float boost, Explanation idf, float[] normTable) {
super(field);
public TFIDFScorer(float boost, Explanation idf, float[] normTable) {
// TODO: Validate?
this.idf = idf;
this.boost = boost;

View File

@ -140,7 +140,7 @@ public abstract class SpanWeight extends Weight {
* @throws IOException on error
*/
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), true, Float.MAX_VALUE);
return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
}
@Override
@ -150,7 +150,7 @@ public abstract class SpanWeight extends Weight {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), true, Float.MAX_VALUE);
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
return Explanation.match(scoreExplanation.getValue(),

View File

@ -211,15 +211,15 @@ public class TestFeatureField extends LuceneTestCase {
}
public void testLogSimScorer() {
doTestSimScorer(new FeatureField.LogFunction(4.5f).scorer("foo", 3f));
doTestSimScorer(new FeatureField.LogFunction(4.5f).scorer(3f));
}
public void testSatuSimScorer() {
doTestSimScorer(new FeatureField.SaturationFunction("foo", "bar", 20f).scorer("foo", 3f));
doTestSimScorer(new FeatureField.SaturationFunction("foo", "bar", 20f).scorer(3f));
}
public void testSigmSimScorer() {
doTestSimScorer(new FeatureField.SigmoidFunction(20f, 0.6f).scorer("foo", 3f));
doTestSimScorer(new FeatureField.SigmoidFunction(20f, 0.6f).scorer(3f));
}
private void doTestSimScorer(SimScorer s) {

View File

@ -109,7 +109,7 @@ public class TestMaxTermFrequency extends LuceneTestCase {
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimScorer(collectionStats.field()) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {

View File

@ -329,7 +329,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimScorer(collectionStats.field()) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
return freq;

View File

@ -101,7 +101,7 @@ public class TestConjunctions extends LuceneTestCase {
@Override
public SimScorer scorer(float boost,
CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimScorer(collectionStats.field()) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
return freq;

View File

@ -333,7 +333,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
SimScorer w = weight.similarity.scorer(1f,
searcher.collectionStatistics("field"),
searcher.termStatistics(term, context));
sims[(int)ord] = new LeafSimScorer(w, reader, true, 1);
sims[(int)ord] = new LeafSimScorer(w, reader, "field", true);
}
}
}

View File

@ -111,7 +111,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimScorer(collectionStats.field()) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
@ -131,7 +131,7 @@ public class TestSimilarityProvider extends LuceneTestCase {
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimScorer(collectionStats.field()) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
return 10;

View File

@ -231,7 +231,7 @@ public class TestSubScorerFreqs extends LuceneTestCase {
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimScorer(collectionStats.field()) {
return new SimScorer() {
@Override
public float score(float freq, long norm) {
return freq;

View File

@ -71,7 +71,7 @@ public class NormValueSource extends ValueSource {
final SimScorer simScorer = similarity.scorer(1f,
new CollectionStatistics(field, 1, 1, 1, 1),
new TermStatistics(new BytesRef("bogus"), 1, 1));
final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), true, Float.MAX_VALUE);
final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true);
return new FloatDocValues(this) {
int lastDocID = -1;

View File

@ -397,7 +397,7 @@ public class TermAutomatonQuery extends Query {
}
if (any) {
return new TermAutomatonScorer(this, enums, anyTermID, idToTerm, new LeafSimScorer(stats, context.reader(), true, Float.MAX_VALUE));
return new TermAutomatonScorer(this, enums, anyTermID, idToTerm, new LeafSimScorer(stats, context.reader(), field, true));
} else {
return null;
}

View File

@ -361,7 +361,7 @@ class TermAutomatonScorer extends Scorer {
@Override
public float getMaxScore(int upTo) throws IOException {
return docScorer.maxScore();
return docScorer.getSimScorer().score(Float.MAX_VALUE, 1L);
}
static class TermRunAutomaton extends RunAutomaton {

View File

@ -142,7 +142,7 @@ public final class IntervalQuery extends Query {
if (intervals == null)
return null;
LeafSimScorer leafScorer = simScorer == null ? null
: new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), Float.MAX_VALUE);
: new LeafSimScorer(simScorer, context.reader(), field, scoreMode.needsScores());
return new IntervalScorer(this, intervals, leafScorer);
}

View File

@ -65,7 +65,7 @@ public class AssertingSimilarity extends Similarity {
final float boost;
AssertingSimScorer(SimScorer delegate, float boost) {
super(delegate.getField());
super();
this.delegate = delegate;
this.boost = boost;
}