Remove LeafSimScorer abstraction. (#13957)

`LeafSimScorer` is a specialization of a `SimScorer` for a given segment. It
doesn't add much value, but benchmarks suggest that it adds measurable overhead
to queries sorted by score.
This commit is contained in:
Adrien Grand 2024-10-26 13:45:54 +02:00 committed by GitHub
parent b12ee52999
commit a3a00f3f58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 204 additions and 205 deletions

View File

@ -9,6 +9,9 @@ API Changes
---------------------
* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)
* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
compute scores directly from a SimScorer, postings and norms. (Adrien Grand)
New Features
---------------------
(No changes)

View File

@ -27,7 +27,6 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
@ -120,7 +119,6 @@ final class FeatureQuery extends Query {
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight thisWeight = this;
Terms terms = Terms.getTerms(context.reader(), fieldName);
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
@ -135,10 +133,8 @@ final class FeatureQuery extends Query {
@Override
public Scorer get(long leadCost) throws IOException {
final SimScorer scorer = function.scorer(boost);
final LeafSimScorer simScorer =
new LeafSimScorer(scorer, context.reader(), fieldName, false);
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
return new TermScorer(thisWeight, impacts, simScorer, topLevelScoringClause);
return new TermScorer(impacts, scorer, null, topLevelScoringClause);
}
@Override

View File

@ -1,72 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** {@link SimScorer} on a specific {@link LeafReader}. */
public final class LeafSimScorer {
private final SimScorer scorer;
private final NumericDocValues norms;
/** Sole constructor: Score documents of {@code reader} with {@code scorer}. */
public LeafSimScorer(SimScorer scorer, LeafReader reader, String field, boolean needsScores)
throws IOException {
this.scorer = Objects.requireNonNull(scorer);
norms = needsScores ? reader.getNormValues(field) : null;
}
/** Return the wrapped {@link SimScorer}. */
public SimScorer getSimScorer() {
return scorer;
}
private long getNormValue(int doc) throws IOException {
if (norms != null) {
boolean found = norms.advanceExact(doc);
assert found;
return norms.longValue();
} else {
return 1L; // default norm
}
}
/**
* Score the provided document assuming the given term document frequency. This method must be
* called on non-decreasing sequences of doc ids.
*
* @see SimScorer#score(float, long)
*/
public float score(int doc, float freq) throws IOException {
return scorer.score(freq, getNormValue(doc));
}
/**
* Explain the score for the provided document assuming the given term document frequency. This
* method must be called on non-decreasing sequences of doc ids.
*
* @see SimScorer#explain(Explanation, long)
*/
public Explanation explain(int doc, Explanation freqExpl) throws IOException {
return scorer.explain(freqExpl, getNormValue(doc));
}
}

View File

@ -18,6 +18,8 @@
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
class PhraseScorer extends Scorer {
@ -26,16 +28,19 @@ class PhraseScorer extends Scorer {
final MaxScoreCache maxScoreCache;
final PhraseMatcher matcher;
final ScoreMode scoreMode;
private final LeafSimScorer simScorer;
private final SimScorer simScorer;
private final NumericDocValues norms;
final float matchCost;
private float minCompetitiveScore = 0;
private float freq = 0;
PhraseScorer(PhraseMatcher matcher, ScoreMode scoreMode, LeafSimScorer simScorer) {
PhraseScorer(
PhraseMatcher matcher, ScoreMode scoreMode, SimScorer simScorer, NumericDocValues norms) {
this.matcher = matcher;
this.scoreMode = scoreMode;
this.simScorer = simScorer;
this.norms = norms;
this.matchCost = matcher.getMatchCost();
this.approximation = matcher.approximation();
this.impactsApproximation = matcher.impactsApproximation();
@ -50,7 +55,11 @@ class PhraseScorer extends Scorer {
matcher.reset();
if (scoreMode == ScoreMode.TOP_SCORES && minCompetitiveScore > 0) {
float maxFreq = matcher.maxFreq();
if (simScorer.score(docID(), maxFreq) < minCompetitiveScore) {
long norm = 1L;
if (norms != null && norms.advanceExact(docID())) {
norm = norms.longValue();
}
if (simScorer.score(maxFreq, norm) < minCompetitiveScore) {
// The maximum score we could get is less than the min competitive score
return false;
}
@ -79,7 +88,11 @@ class PhraseScorer extends Scorer {
freq += matcher.sloppyWeight();
}
}
return simScorer.score(docID(), freq);
long norm = 1L;
if (norms != null && norms.advanceExact(docID())) {
norm = norms.longValue();
}
return simScorer.score(freq, norm);
}
@Override

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
@ -63,9 +64,8 @@ public abstract class PhraseWeight extends Weight {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
PhraseMatcher matcher = getPhraseMatcher(context, stats, false);
if (matcher == null) return null;
LeafSimScorer simScorer =
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
final var scorer = new PhraseScorer(matcher, scoreMode, simScorer);
NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
final var scorer = new PhraseScorer(matcher, scoreMode, stats, norms);
return new DefaultScorerSupplier(scorer);
}
@ -83,10 +83,13 @@ public abstract class PhraseWeight extends Weight {
while (matcher.nextMatch()) {
freq += matcher.sloppyWeight();
}
LeafSimScorer docScorer =
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation scoreExplanation = stats.explain(freqExplanation, norm);
return Explanation.match(
scoreExplanation.getValue(),
"weight("

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.ImpactsSource;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Term;
@ -38,6 +39,7 @@ import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOSupplier;
import org.apache.lucene.util.PriorityQueue;
@ -259,9 +261,13 @@ public final class SynonymQuery extends Query {
assert scorer instanceof TermScorer;
freq = ((TermScorer) scorer).freq();
}
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
NumericDocValues norms = context.reader().getNormValues(field);
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation scoreExplanation = simWeight.explain(freqExplanation, norm);
return Explanation.match(
scoreExplanation.getValue(),
"weight("
@ -334,27 +340,27 @@ public final class SynonymQuery extends Query {
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
}
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
NumericDocValues norms = context.reader().getNormValues(field);
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
if (iterators.size() == 1) {
final TermScorer scorer;
if (scoreMode == ScoreMode.TOP_SCORES) {
scorer = new TermScorer(impacts.get(0), simScorer);
scorer = new TermScorer(impacts.get(0), simWeight, norms);
} else {
scorer = new TermScorer(iterators.get(0), simScorer);
scorer = new TermScorer(iterators.get(0), simWeight, norms);
}
float boost = termBoosts.get(0);
return scoreMode == ScoreMode.COMPLETE_NO_SCORES || boost == 1f
? scorer
: new FreqBoostTermScorer(boost, scorer, simScorer);
: new FreqBoostTermScorer(boost, scorer, simWeight, norms);
} else {
// we use termscorers + disjunction as an impl detail
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
for (int i = 0; i < iterators.size(); i++) {
PostingsEnum postings = iterators.get(i);
final TermScorer termScorer = new TermScorer(postings, simScorer);
final TermScorer termScorer = new TermScorer(postings, simWeight, norms);
float boost = termBoosts.get(i);
final DisiWrapperFreq wrapper = new DisiWrapperFreq(termScorer, boost);
queue.add(wrapper);
@ -368,8 +374,7 @@ public final class SynonymQuery extends Query {
boosts[i] = termBoosts.get(i);
}
ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]), boosts);
MaxScoreCache maxScoreCache =
new MaxScoreCache(impactsSource, simScorer.getSimScorer());
MaxScoreCache maxScoreCache = new MaxScoreCache(impactsSource, simWeight);
ImpactsDISI impactsDisi = new ImpactsDISI(iterator, maxScoreCache);
if (scoreMode == ScoreMode.TOP_SCORES) {
@ -379,7 +384,7 @@ public final class SynonymQuery extends Query {
iterator = impactsDisi;
}
return new SynonymScorer(queue, iterator, impactsDisi, simScorer);
return new SynonymScorer(queue, iterator, impactsDisi, simWeight, norms);
}
}
@ -575,18 +580,21 @@ public final class SynonymQuery extends Query {
private final DocIdSetIterator iterator;
private final MaxScoreCache maxScoreCache;
private final ImpactsDISI impactsDisi;
private final LeafSimScorer simScorer;
private final SimScorer scorer;
private final NumericDocValues norms;
SynonymScorer(
DisiPriorityQueue queue,
DocIdSetIterator iterator,
ImpactsDISI impactsDisi,
LeafSimScorer simScorer) {
SimScorer scorer,
NumericDocValues norms) {
this.queue = queue;
this.iterator = iterator;
this.maxScoreCache = impactsDisi.getMaxScoreCache();
this.impactsDisi = impactsDisi;
this.simScorer = simScorer;
this.scorer = scorer;
this.norms = norms;
}
@Override
@ -605,7 +613,11 @@ public final class SynonymQuery extends Query {
@Override
public float score() throws IOException {
return simScorer.score(iterator.docID(), freq());
long norm = 1L;
if (norms != null && norms.advanceExact(iterator.docID())) {
norm = norms.longValue();
}
return scorer.score(freq(), norm);
}
@Override
@ -647,9 +659,11 @@ public final class SynonymQuery extends Query {
private static class FreqBoostTermScorer extends FilterScorer {
final float boost;
final TermScorer in;
final LeafSimScorer docScorer;
final SimScorer scorer;
final NumericDocValues norms;
public FreqBoostTermScorer(float boost, TermScorer in, LeafSimScorer docScorer) {
public FreqBoostTermScorer(
float boost, TermScorer in, SimScorer scorer, NumericDocValues norms) {
super(in);
if (Float.isNaN(boost) || Float.compare(boost, 0f) < 0 || Float.compare(boost, 1f) > 0) {
throw new IllegalArgumentException(
@ -657,7 +671,8 @@ public final class SynonymQuery extends Query {
}
this.boost = boost;
this.in = in;
this.docScorer = docScorer;
this.scorer = scorer;
this.norms = norms;
}
float freq() throws IOException {
@ -666,8 +681,11 @@ public final class SynonymQuery extends Query {
@Override
public float score() throws IOException {
assert docID() != DocIdSetIterator.NO_MORE_DOCS;
return docScorer.score(in.docID(), freq());
long norm = 1L;
if (norms != null && norms.advanceExact(in.docID())) {
norm = norms.longValue();
}
return scorer.score(freq(), norm);
}
@Override

View File

@ -22,6 +22,7 @@ import java.util.Objects;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
@ -150,19 +151,17 @@ public class TermQuery extends Query {
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
}
LeafSimScorer scorer =
new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
NumericDocValues norms = null;
if (scoreMode.needsScores()) {
norms = context.reader().getNormValues(term.field());
}
if (scoreMode == ScoreMode.TOP_SCORES) {
return new TermScorer(
TermWeight.this,
termsEnum.impacts(PostingsEnum.FREQS),
scorer,
topLevelScoringClause);
termsEnum.impacts(PostingsEnum.FREQS), simScorer, norms, topLevelScoringClause);
} else {
return new TermScorer(
termsEnum.postings(
null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE),
scorer);
int flags = scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE;
return new TermScorer(termsEnum.postings(null, flags), simScorer, norms);
}
}
@ -223,11 +222,14 @@ public class TermQuery extends Query {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float freq = ((TermScorer) scorer).freq();
LeafSimScorer docScorer =
new LeafSimScorer(simScorer, context.reader(), term.field(), true);
NumericDocValues norms = context.reader().getNormValues(term.field());
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation freqExplanation =
Explanation.match(freq, "freq, occurrences of term within document");
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation scoreExplanation = simScorer.explain(freqExplanation, norm);
return Explanation.match(
scoreExplanation.getValue(),
"weight("

View File

@ -18,8 +18,10 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/**
* Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
@ -29,17 +31,19 @@ import org.apache.lucene.index.SlowImpactsEnum;
public final class TermScorer extends Scorer {
private final PostingsEnum postingsEnum;
private final DocIdSetIterator iterator;
private final LeafSimScorer docScorer;
private final SimScorer scorer;
private final NumericDocValues norms;
private final ImpactsDISI impactsDisi;
private final MaxScoreCache maxScoreCache;
/** Construct a {@link TermScorer} that will iterate all documents. */
public TermScorer(PostingsEnum postingsEnum, LeafSimScorer docScorer) {
public TermScorer(PostingsEnum postingsEnum, SimScorer scorer, NumericDocValues norms) {
iterator = this.postingsEnum = postingsEnum;
ImpactsEnum impactsEnum = new SlowImpactsEnum(postingsEnum);
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
maxScoreCache = new MaxScoreCache(impactsEnum, scorer);
impactsDisi = null;
this.docScorer = docScorer;
this.scorer = scorer;
this.norms = norms;
}
/**
@ -47,12 +51,12 @@ public final class TermScorer extends Scorer {
* documents.
*/
public TermScorer(
Weight weight,
ImpactsEnum impactsEnum,
LeafSimScorer docScorer,
SimScorer scorer,
NumericDocValues norms,
boolean topLevelScoringClause) {
postingsEnum = impactsEnum;
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
maxScoreCache = new MaxScoreCache(impactsEnum, scorer);
if (topLevelScoringClause) {
impactsDisi = new ImpactsDISI(impactsEnum, maxScoreCache);
iterator = impactsDisi;
@ -60,7 +64,8 @@ public final class TermScorer extends Scorer {
impactsDisi = null;
iterator = impactsEnum;
}
this.docScorer = docScorer;
this.scorer = scorer;
this.norms = norms;
}
@Override
@ -80,13 +85,23 @@ public final class TermScorer extends Scorer {
@Override
public float score() throws IOException {
assert docID() != DocIdSetIterator.NO_MORE_DOCS;
return docScorer.score(postingsEnum.docID(), postingsEnum.freq());
var postingsEnum = this.postingsEnum;
var norms = this.norms;
long norm = 1L;
if (norms != null && norms.advanceExact(postingsEnum.docID())) {
norm = norms.longValue();
}
return scorer.score(postingsEnum.freq(), norm);
}
@Override
public float smoothingScore(int docId) throws IOException {
return docScorer.score(docId, 0);
long norm = 1L;
if (norms != null && norms.advanceExact(docId)) {
norm = norms.longValue();
}
return scorer.score(0, norm);
}
@Override

View File

@ -29,6 +29,7 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
@ -345,7 +346,8 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
final int maxDoc;
final Set<Long> ords = new HashSet<>();
final LeafSimScorer[] sims;
final SimScorer[] sims;
final NumericDocValues norms;
final int minNrShouldMatch;
double score = Float.NaN;
@ -356,7 +358,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
this.maxDoc = reader.maxDoc();
BooleanQuery bq = (BooleanQuery) weight.getQuery();
this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
this.sims = new LeafSimScorer[(int) dv.getValueCount()];
this.sims = new SimScorer[(int) dv.getValueCount()];
for (BooleanClause clause : bq.clauses()) {
assert !clause.isProhibited();
assert !clause.isRequired();
@ -366,14 +368,14 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
boolean success = ords.add(ord);
assert success; // no dups
TermStates ts = TermStates.build(searcher, term, true);
SimScorer w =
sims[(int) ord] =
weight.similarity.scorer(
1f,
searcher.collectionStatistics("field"),
searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
sims[(int) ord] = new LeafSimScorer(w, reader, "field", true);
}
}
norms = reader.getNormValues("field");
}
@Override
@ -409,11 +411,15 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
continue;
}
long ord;
long norm = 1L;
if (norms != null && norms.advanceExact(currentDoc)) {
norm = norms.longValue();
}
for (int i = 0; i < dv.docValueCount(); i++) {
ord = dv.nextOrd();
if (ords.contains(ord)) {
currentMatched++;
score += sims[(int) ord].score(currentDoc, 1);
score += sims[(int) ord].score(1, norm);
}
}
if (currentMatched >= minNrShouldMatch) {

View File

@ -19,12 +19,12 @@ package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
@ -76,8 +76,7 @@ public class NormValueSource extends ValueSource {
1f,
new CollectionStatistics(field, 1, 1, 1, 1),
new TermStatistics(new BytesRef("bogus"), 1, 1));
final LeafSimScorer leafSimScorer =
new LeafSimScorer(simScorer, readerContext.reader(), field, true);
final NumericDocValues norms = readerContext.reader().getNormValues(field);
return new FloatDocValues(this) {
int lastDocID = -1;
@ -88,7 +87,11 @@ public class NormValueSource extends ValueSource {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
}
lastDocID = docID;
return leafSimScorer.score(docID, 1f);
long norm = 1L;
if (norms != null && norms.advanceExact(docID)) {
norm = norms.longValue();
}
return simScorer.score(1f, norm);
}
};
}

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
@ -32,11 +33,11 @@ import org.apache.lucene.queries.spans.Spans;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
/**
@ -190,9 +191,9 @@ public class PayloadScoreQuery extends SpanQuery {
if (spans == null) {
return null;
}
LeafSimScorer docScorer = innerWeight.getSimScorer(context);
NumericDocValues norms = context.reader().getNormValues(field);
PayloadSpans payloadSpans = new PayloadSpans(spans, decoder);
final var scorer = new PayloadSpanScorer(payloadSpans, docScorer);
final var scorer = new PayloadSpanScorer(payloadSpans, innerWeight.getSimScorer(), norms);
return new DefaultScorerSupplier(scorer);
}
}
@ -248,8 +249,9 @@ public class PayloadScoreQuery extends SpanQuery {
private final PayloadSpans spans;
private PayloadSpanScorer(PayloadSpans spans, LeafSimScorer docScorer) throws IOException {
super(spans, docScorer);
private PayloadSpanScorer(PayloadSpans spans, SimScorer scorer, NumericDocValues norms)
throws IOException {
super(spans, scorer, norms);
this.spans = spans;
}

View File

@ -21,6 +21,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
@ -34,7 +35,6 @@ import org.apache.lucene.queries.spans.SpanWeight;
import org.apache.lucene.queries.spans.Spans;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
@ -191,8 +191,8 @@ public class SpanPayloadCheckQuery extends SpanQuery {
if (spans == null) {
return null;
}
final LeafSimScorer docScorer = getSimScorer(context);
final var scorer = new SpanScorer(spans, docScorer);
final NumericDocValues norms = context.reader().getNormValues(field);
final var scorer = new SpanScorer(spans, getSimScorer(), norms);
return new DefaultScorerSupplier(scorer);
}

View File

@ -20,10 +20,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.ScorerSupplier;
@ -144,8 +144,8 @@ public final class SpanContainingQuery extends SpanContainQuery {
if (spans == null) {
return null;
}
final LeafSimScorer docScorer = getSimScorer(context);
final var scorer = new SpanScorer(spans, docScorer);
final NumericDocValues norms = context.reader().getNormValues(field);
final var scorer = new SpanScorer(spans, getSimScorer(), norms);
return new DefaultScorerSupplier(scorer);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
@ -247,8 +246,8 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
if (spans == null) {
return null;
}
final LeafSimScorer docScorer = getSimScorer(context);
final var scorer = new SpanScorer(spans, docScorer);
final var scorer =
new SpanScorer(spans, getSimScorer(), context.reader().getNormValues(field));
return new DefaultScorerSupplier(scorer);
}
}

View File

@ -18,10 +18,11 @@ package org.apache.lucene.queries.spans;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/**
* A basic {@link Scorer} over {@link Spans}.
@ -31,7 +32,8 @@ import org.apache.lucene.search.TwoPhaseIterator;
public class SpanScorer extends Scorer {
protected final Spans spans;
protected final LeafSimScorer docScorer;
protected final SimScorer scorer;
protected final NumericDocValues norms;
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
private float freq;
@ -39,9 +41,10 @@ public class SpanScorer extends Scorer {
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
/** Sole constructor. */
public SpanScorer(Spans spans, LeafSimScorer docScorer) {
public SpanScorer(Spans spans, SimScorer scorer, NumericDocValues norms) {
this.spans = Objects.requireNonNull(spans);
this.docScorer = docScorer;
this.scorer = scorer;
this.norms = norms;
}
/** return the Spans for this Scorer * */
@ -69,8 +72,12 @@ public class SpanScorer extends Scorer {
* slop-adjusted {@link #freq}.
*/
protected float scoreCurrentDoc() throws IOException {
assert docScorer != null : getClass() + " has a null docScorer!";
return docScorer.score(docID(), freq);
assert scorer != null : getClass() + " has a null docScorer!";
long norm = 1L;
if (norms != null && norms.advanceExact(docID())) {
norm = norms.longValue();
}
return scorer.score(freq, norm);
}
/**
@ -98,7 +105,7 @@ public class SpanScorer extends Scorer {
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased
// endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos=" + endPos;
if (docScorer == null) { // scores not required, break out here
if (scorer == null) { // scores not required, break out here
freq = 1;
return;
}

View File

@ -160,7 +160,7 @@ public class SpanTermQuery extends SpanQuery {
final PostingsEnum postings =
termsEnum.postings(null, requiredPostings.getRequiredPostings());
float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
return new TermSpans(getSimScorer(context), postings, term, positionsCost);
return new TermSpans(postings, term, positionsCost);
}
}

View File

@ -22,13 +22,13 @@ import java.util.Comparator;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
@ -38,6 +38,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
/** Expert-only. Public for use by other weight implementations */
@ -142,8 +143,8 @@ public abstract class SpanWeight extends Weight {
if (spans == null) {
return null;
}
final LeafSimScorer docScorer = getSimScorer(context);
final var scorer = new SpanScorer(spans, docScorer);
final NumericDocValues norms = context.reader().getNormValues(field);
final var scorer = new SpanScorer(spans, simScorer, norms);
return new ScorerSupplier() {
@Override
public SpanScorer get(long leadCost) throws IOException {
@ -157,15 +158,9 @@ public abstract class SpanWeight extends Weight {
};
}
/**
* Return a LeafSimScorer for this context
*
* @param context the LeafReaderContext
* @return a SimWeight
* @throws IOException on error
*/
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
/** Return the SimScorer */
public SimScorer getSimScorer() {
return simScorer;
}
@Override
@ -176,9 +171,13 @@ public abstract class SpanWeight extends Weight {
if (newDoc == doc) {
if (simScorer != null) {
float freq = scorer.sloppyFreq();
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
NumericDocValues norms = context.reader().getNormValues(field);
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation scoreExplanation = simScorer.explain(freqExplanation, norm);
return Explanation.match(
scoreExplanation.getValue(),
"weight("

View File

@ -21,7 +21,6 @@ import java.util.Objects;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafSimScorer;
/**
* Expert: Public for extension only. This does not work correctly for terms that indexed at
@ -37,7 +36,7 @@ public class TermSpans extends Spans {
protected boolean readPayload;
private final float positionsCost;
public TermSpans(LeafSimScorer scorer, PostingsEnum postings, Term term, float positionsCost) {
public TermSpans(PostingsEnum postings, Term term, float positionsCost) {
this.postings = Objects.requireNonNull(postings);
this.term = Objects.requireNonNull(term);
this.doc = -1;

View File

@ -23,8 +23,8 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** Wraps a SpanWeight with additional asserts */
public class AssertingSpanWeight extends SpanWeight {
@ -55,8 +55,8 @@ public class AssertingSpanWeight extends SpanWeight {
}
@Override
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
return in.getSimScorer(context);
public SimScorer getSimScorer() {
return in.getSimScorer();
}
@Override

View File

@ -45,7 +45,6 @@ import org.apache.lucene.search.DisjunctionDISIApproximation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
@ -402,14 +401,12 @@ public final class CombinedFieldQuery extends Query implements Accountable {
MultiNormsLeafSimScorer scoringSimScorer =
new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
LeafSimScorer nonScoringSimScorer =
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
// we use termscorers + disjunction as an impl detail
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
for (int i = 0; i < iterators.size(); i++) {
float weight = fields.get(i).weight;
queue.add(
new WeightedDisiWrapper(new TermScorer(iterators.get(i), nonScoringSimScorer), weight));
new WeightedDisiWrapper(new TermScorer(iterators.get(i), simWeight, null), weight));
}
// Even though it is called approximation, it is accurate since none of
// the sub iterators are two-phase iterators.

View File

@ -16,8 +16,6 @@
*/
package org.apache.lucene.sandbox.search;
import static org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@ -27,13 +25,13 @@ import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.SmallFloat;
/**
* Copy of {@link LeafSimScorer} that sums document's norms from multiple fields.
* Scorer that sums document's norms from multiple fields.
*
* <p>For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also
* requires that either all fields or no fields have norms enabled. Having only some fields with

View File

@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
@ -35,7 +36,6 @@ import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@ -429,9 +429,8 @@ public class TermAutomatonQuery extends Query implements Accountable {
}
if (any) {
scorer =
new TermAutomatonScorer(
this, enums, anyTermID, new LeafSimScorer(stats, context.reader(), field, true));
NumericDocValues norms = context.reader().getNormValues(field);
scorer = new TermAutomatonScorer(this, enums, anyTermID, stats, norms);
} else {
return null;
}
@ -456,15 +455,20 @@ public class TermAutomatonQuery extends Query implements Accountable {
}
float score = scorer.score();
LeafSimScorer leafSimScorer = ((TermAutomatonScorer) scorer).getLeafSimScorer();
EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc();
NumericDocValues norms = context.reader().getNormValues(field);
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
List<Explanation> termExplanations = new ArrayList<>();
for (EnumAndScorer enumAndScorer : originalSubsOnDoc) {
if (enumAndScorer != null) {
PostingsEnum postingsEnum = enumAndScorer.posEnum;
if (postingsEnum.docID() == doc) {
float termScore = leafSimScorer.score(doc, postingsEnum.freq());
float termScore = stats.score(postingsEnum.freq(), norm);
termExplanations.add(
Explanation.match(
postingsEnum.freq(),
@ -482,7 +486,7 @@ public class TermAutomatonQuery extends Query implements Accountable {
Explanation freqExplanation =
Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations);
return leafSimScorer.explain(doc, freqExplanation);
return stats.explain(freqExplanation, norm);
}
}

View File

@ -17,11 +17,12 @@
package org.apache.lucene.sandbox.search;
import java.io.IOException;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.sandbox.search.TermAutomatonQuery.EnumAndScorer;
import org.apache.lucene.sandbox.search.TermAutomatonQuery.TermAutomatonWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
@ -44,7 +45,8 @@ class TermAutomatonScorer extends Scorer {
// This is -1 if wildcard (null) terms were not used, else it's the id
// of the wildcard term:
private final int anyTermID;
private final LeafSimScorer docScorer;
private final SimScorer scorer;
private final NumericDocValues norms;
private int numSubsOnDoc;
@ -61,11 +63,16 @@ class TermAutomatonScorer extends Scorer {
private final EnumAndScorer[] originalSubsOnDoc;
public TermAutomatonScorer(
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer)
TermAutomatonWeight weight,
EnumAndScorer[] subs,
int anyTermID,
SimScorer scorer,
NumericDocValues norms)
throws IOException {
// System.out.println(" automaton:\n" + weight.automaton.toDot());
this.runAutomaton = new TermRunAutomaton(weight.automaton, subs.length);
this.docScorer = docScorer;
this.scorer = scorer;
this.norms = norms;
this.docIDQueue = new DocIDQueue(subs.length);
this.posQueue = new PositionQueue(subs.length);
this.anyTermID = anyTermID;
@ -356,10 +363,6 @@ class TermAutomatonScorer extends Scorer {
return originalSubsOnDoc;
}
LeafSimScorer getLeafSimScorer() {
return docScorer;
}
@Override
public int docID() {
return docID;
@ -369,12 +372,16 @@ class TermAutomatonScorer extends Scorer {
public float score() throws IOException {
// TODO: we could probably do better here, e.g. look @ freqs of actual terms involved in this
// doc and score differently
return docScorer.score(docID, freq);
long norm = 1L;
if (norms != null && norms.advanceExact(docID)) {
norm = norms.longValue();
}
return scorer.score(freq, norm);
}
@Override
public float getMaxScore(int upTo) throws IOException {
return docScorer.getSimScorer().score(Float.MAX_VALUE, 1L);
return scorer.score(Float.MAX_VALUE, 1L);
}
static class TermRunAutomaton extends RunAutomaton {