Remove LeafSimScorer abstraction. (#13957)

`LeafSimScorer` is a specialization of a `SimScorer` for a given segment. It
doesn't add much value, but benchmarks suggest that it adds measurable overhead
to queries sorted by score.
This commit is contained in:
Adrien Grand 2024-10-26 13:45:54 +02:00 committed by GitHub
parent b12ee52999
commit a3a00f3f58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 204 additions and 205 deletions

View File

@ -9,6 +9,9 @@ API Changes
--------------------- ---------------------
* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski) * GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)
* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
compute scores directly from a SimScorer, postings and norms. (Adrien Grand)
New Features New Features
--------------------- ---------------------
(No changes) (No changes)

View File

@ -27,7 +27,6 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -120,7 +119,6 @@ final class FeatureQuery extends Query {
@Override @Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight thisWeight = this;
Terms terms = Terms.getTerms(context.reader(), fieldName); Terms terms = Terms.getTerms(context.reader(), fieldName);
TermsEnum termsEnum = terms.iterator(); TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(new BytesRef(featureName)) == false) { if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
@ -135,10 +133,8 @@ final class FeatureQuery extends Query {
@Override @Override
public Scorer get(long leadCost) throws IOException { public Scorer get(long leadCost) throws IOException {
final SimScorer scorer = function.scorer(boost); final SimScorer scorer = function.scorer(boost);
final LeafSimScorer simScorer =
new LeafSimScorer(scorer, context.reader(), fieldName, false);
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS); final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
return new TermScorer(thisWeight, impacts, simScorer, topLevelScoringClause); return new TermScorer(impacts, scorer, null, topLevelScoringClause);
} }
@Override @Override

View File

@ -1,72 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** {@link SimScorer} on a specific {@link LeafReader}. */
public final class LeafSimScorer {
private final SimScorer scorer;
private final NumericDocValues norms;
/** Sole constructor: Score documents of {@code reader} with {@code scorer}. */
public LeafSimScorer(SimScorer scorer, LeafReader reader, String field, boolean needsScores)
throws IOException {
this.scorer = Objects.requireNonNull(scorer);
norms = needsScores ? reader.getNormValues(field) : null;
}
/** Return the wrapped {@link SimScorer}. */
public SimScorer getSimScorer() {
return scorer;
}
private long getNormValue(int doc) throws IOException {
if (norms != null) {
boolean found = norms.advanceExact(doc);
assert found;
return norms.longValue();
} else {
return 1L; // default norm
}
}
/**
* Score the provided document assuming the given term document frequency. This method must be
* called on non-decreasing sequences of doc ids.
*
* @see SimScorer#score(float, long)
*/
public float score(int doc, float freq) throws IOException {
return scorer.score(freq, getNormValue(doc));
}
/**
* Explain the score for the provided document assuming the given term document frequency. This
* method must be called on non-decreasing sequences of doc ids.
*
* @see SimScorer#explain(Explanation, long)
*/
public Explanation explain(int doc, Explanation freqExpl) throws IOException {
return scorer.explain(freqExpl, getNormValue(doc));
}
}

View File

@ -18,6 +18,8 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
class PhraseScorer extends Scorer { class PhraseScorer extends Scorer {
@ -26,16 +28,19 @@ class PhraseScorer extends Scorer {
final MaxScoreCache maxScoreCache; final MaxScoreCache maxScoreCache;
final PhraseMatcher matcher; final PhraseMatcher matcher;
final ScoreMode scoreMode; final ScoreMode scoreMode;
private final LeafSimScorer simScorer; private final SimScorer simScorer;
private final NumericDocValues norms;
final float matchCost; final float matchCost;
private float minCompetitiveScore = 0; private float minCompetitiveScore = 0;
private float freq = 0; private float freq = 0;
PhraseScorer(PhraseMatcher matcher, ScoreMode scoreMode, LeafSimScorer simScorer) { PhraseScorer(
PhraseMatcher matcher, ScoreMode scoreMode, SimScorer simScorer, NumericDocValues norms) {
this.matcher = matcher; this.matcher = matcher;
this.scoreMode = scoreMode; this.scoreMode = scoreMode;
this.simScorer = simScorer; this.simScorer = simScorer;
this.norms = norms;
this.matchCost = matcher.getMatchCost(); this.matchCost = matcher.getMatchCost();
this.approximation = matcher.approximation(); this.approximation = matcher.approximation();
this.impactsApproximation = matcher.impactsApproximation(); this.impactsApproximation = matcher.impactsApproximation();
@ -50,7 +55,11 @@ class PhraseScorer extends Scorer {
matcher.reset(); matcher.reset();
if (scoreMode == ScoreMode.TOP_SCORES && minCompetitiveScore > 0) { if (scoreMode == ScoreMode.TOP_SCORES && minCompetitiveScore > 0) {
float maxFreq = matcher.maxFreq(); float maxFreq = matcher.maxFreq();
if (simScorer.score(docID(), maxFreq) < minCompetitiveScore) { long norm = 1L;
if (norms != null && norms.advanceExact(docID())) {
norm = norms.longValue();
}
if (simScorer.score(maxFreq, norm) < minCompetitiveScore) {
// The maximum score we could get is less than the min competitive score // The maximum score we could get is less than the min competitive score
return false; return false;
} }
@ -79,7 +88,11 @@ class PhraseScorer extends Scorer {
freq += matcher.sloppyWeight(); freq += matcher.sloppyWeight();
} }
} }
return simScorer.score(docID(), freq); long norm = 1L;
if (norms != null && norms.advanceExact(docID())) {
norm = norms.longValue();
}
return simScorer.score(freq, norm);
} }
@Override @Override

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity.SimScorer;
@ -63,9 +64,8 @@ public abstract class PhraseWeight extends Weight {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
PhraseMatcher matcher = getPhraseMatcher(context, stats, false); PhraseMatcher matcher = getPhraseMatcher(context, stats, false);
if (matcher == null) return null; if (matcher == null) return null;
LeafSimScorer simScorer = NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores()); final var scorer = new PhraseScorer(matcher, scoreMode, stats, norms);
final var scorer = new PhraseScorer(matcher, scoreMode, simScorer);
return new DefaultScorerSupplier(scorer); return new DefaultScorerSupplier(scorer);
} }
@ -83,10 +83,13 @@ public abstract class PhraseWeight extends Weight {
while (matcher.nextMatch()) { while (matcher.nextMatch()) {
freq += matcher.sloppyWeight(); freq += matcher.sloppyWeight();
} }
LeafSimScorer docScorer =
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation scoreExplanation = stats.explain(freqExplanation, norm);
return Explanation.match( return Explanation.match(
scoreExplanation.getValue(), scoreExplanation.getValue(),
"weight(" "weight("

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.ImpactsSource; import org.apache.lucene.index.ImpactsSource;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -38,6 +39,7 @@ import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOSupplier; import org.apache.lucene.util.IOSupplier;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
@ -259,9 +261,13 @@ public final class SynonymQuery extends Query {
assert scorer instanceof TermScorer; assert scorer instanceof TermScorer;
freq = ((TermScorer) scorer).freq(); freq = ((TermScorer) scorer).freq();
} }
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq); Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); NumericDocValues norms = context.reader().getNormValues(field);
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation scoreExplanation = simWeight.explain(freqExplanation, norm);
return Explanation.match( return Explanation.match(
scoreExplanation.getValue(), scoreExplanation.getValue(),
"weight(" "weight("
@ -334,27 +340,27 @@ public final class SynonymQuery extends Query {
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty()); return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
} }
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), field, true); NumericDocValues norms = context.reader().getNormValues(field);
// we must optimize this case (term not in segment), disjunctions require >= 2 subs // we must optimize this case (term not in segment), disjunctions require >= 2 subs
if (iterators.size() == 1) { if (iterators.size() == 1) {
final TermScorer scorer; final TermScorer scorer;
if (scoreMode == ScoreMode.TOP_SCORES) { if (scoreMode == ScoreMode.TOP_SCORES) {
scorer = new TermScorer(impacts.get(0), simScorer); scorer = new TermScorer(impacts.get(0), simWeight, norms);
} else { } else {
scorer = new TermScorer(iterators.get(0), simScorer); scorer = new TermScorer(iterators.get(0), simWeight, norms);
} }
float boost = termBoosts.get(0); float boost = termBoosts.get(0);
return scoreMode == ScoreMode.COMPLETE_NO_SCORES || boost == 1f return scoreMode == ScoreMode.COMPLETE_NO_SCORES || boost == 1f
? scorer ? scorer
: new FreqBoostTermScorer(boost, scorer, simScorer); : new FreqBoostTermScorer(boost, scorer, simWeight, norms);
} else { } else {
// we use termscorers + disjunction as an impl detail // we use termscorers + disjunction as an impl detail
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size()); DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
for (int i = 0; i < iterators.size(); i++) { for (int i = 0; i < iterators.size(); i++) {
PostingsEnum postings = iterators.get(i); PostingsEnum postings = iterators.get(i);
final TermScorer termScorer = new TermScorer(postings, simScorer); final TermScorer termScorer = new TermScorer(postings, simWeight, norms);
float boost = termBoosts.get(i); float boost = termBoosts.get(i);
final DisiWrapperFreq wrapper = new DisiWrapperFreq(termScorer, boost); final DisiWrapperFreq wrapper = new DisiWrapperFreq(termScorer, boost);
queue.add(wrapper); queue.add(wrapper);
@ -368,8 +374,7 @@ public final class SynonymQuery extends Query {
boosts[i] = termBoosts.get(i); boosts[i] = termBoosts.get(i);
} }
ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]), boosts); ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]), boosts);
MaxScoreCache maxScoreCache = MaxScoreCache maxScoreCache = new MaxScoreCache(impactsSource, simWeight);
new MaxScoreCache(impactsSource, simScorer.getSimScorer());
ImpactsDISI impactsDisi = new ImpactsDISI(iterator, maxScoreCache); ImpactsDISI impactsDisi = new ImpactsDISI(iterator, maxScoreCache);
if (scoreMode == ScoreMode.TOP_SCORES) { if (scoreMode == ScoreMode.TOP_SCORES) {
@ -379,7 +384,7 @@ public final class SynonymQuery extends Query {
iterator = impactsDisi; iterator = impactsDisi;
} }
return new SynonymScorer(queue, iterator, impactsDisi, simScorer); return new SynonymScorer(queue, iterator, impactsDisi, simWeight, norms);
} }
} }
@ -575,18 +580,21 @@ public final class SynonymQuery extends Query {
private final DocIdSetIterator iterator; private final DocIdSetIterator iterator;
private final MaxScoreCache maxScoreCache; private final MaxScoreCache maxScoreCache;
private final ImpactsDISI impactsDisi; private final ImpactsDISI impactsDisi;
private final LeafSimScorer simScorer; private final SimScorer scorer;
private final NumericDocValues norms;
SynonymScorer( SynonymScorer(
DisiPriorityQueue queue, DisiPriorityQueue queue,
DocIdSetIterator iterator, DocIdSetIterator iterator,
ImpactsDISI impactsDisi, ImpactsDISI impactsDisi,
LeafSimScorer simScorer) { SimScorer scorer,
NumericDocValues norms) {
this.queue = queue; this.queue = queue;
this.iterator = iterator; this.iterator = iterator;
this.maxScoreCache = impactsDisi.getMaxScoreCache(); this.maxScoreCache = impactsDisi.getMaxScoreCache();
this.impactsDisi = impactsDisi; this.impactsDisi = impactsDisi;
this.simScorer = simScorer; this.scorer = scorer;
this.norms = norms;
} }
@Override @Override
@ -605,7 +613,11 @@ public final class SynonymQuery extends Query {
@Override @Override
public float score() throws IOException { public float score() throws IOException {
return simScorer.score(iterator.docID(), freq()); long norm = 1L;
if (norms != null && norms.advanceExact(iterator.docID())) {
norm = norms.longValue();
}
return scorer.score(freq(), norm);
} }
@Override @Override
@ -647,9 +659,11 @@ public final class SynonymQuery extends Query {
private static class FreqBoostTermScorer extends FilterScorer { private static class FreqBoostTermScorer extends FilterScorer {
final float boost; final float boost;
final TermScorer in; final TermScorer in;
final LeafSimScorer docScorer; final SimScorer scorer;
final NumericDocValues norms;
public FreqBoostTermScorer(float boost, TermScorer in, LeafSimScorer docScorer) { public FreqBoostTermScorer(
float boost, TermScorer in, SimScorer scorer, NumericDocValues norms) {
super(in); super(in);
if (Float.isNaN(boost) || Float.compare(boost, 0f) < 0 || Float.compare(boost, 1f) > 0) { if (Float.isNaN(boost) || Float.compare(boost, 0f) < 0 || Float.compare(boost, 1f) > 0) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
@ -657,7 +671,8 @@ public final class SynonymQuery extends Query {
} }
this.boost = boost; this.boost = boost;
this.in = in; this.in = in;
this.docScorer = docScorer; this.scorer = scorer;
this.norms = norms;
} }
float freq() throws IOException { float freq() throws IOException {
@ -666,8 +681,11 @@ public final class SynonymQuery extends Query {
@Override @Override
public float score() throws IOException { public float score() throws IOException {
assert docID() != DocIdSetIterator.NO_MORE_DOCS; long norm = 1L;
return docScorer.score(in.docID(), freq()); if (norms != null && norms.advanceExact(in.docID())) {
norm = norms.longValue();
}
return scorer.score(freq(), norm);
} }
@Override @Override

View File

@ -22,6 +22,7 @@ import java.util.Objects;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -150,19 +151,17 @@ public class TermQuery extends Query {
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty()); return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
} }
LeafSimScorer scorer = NumericDocValues norms = null;
new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores()); if (scoreMode.needsScores()) {
norms = context.reader().getNormValues(term.field());
}
if (scoreMode == ScoreMode.TOP_SCORES) { if (scoreMode == ScoreMode.TOP_SCORES) {
return new TermScorer( return new TermScorer(
TermWeight.this, termsEnum.impacts(PostingsEnum.FREQS), simScorer, norms, topLevelScoringClause);
termsEnum.impacts(PostingsEnum.FREQS),
scorer,
topLevelScoringClause);
} else { } else {
return new TermScorer( int flags = scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE;
termsEnum.postings( return new TermScorer(termsEnum.postings(null, flags), simScorer, norms);
null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE),
scorer);
} }
} }
@ -223,11 +222,14 @@ public class TermQuery extends Query {
int newDoc = scorer.iterator().advance(doc); int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) { if (newDoc == doc) {
float freq = ((TermScorer) scorer).freq(); float freq = ((TermScorer) scorer).freq();
LeafSimScorer docScorer = NumericDocValues norms = context.reader().getNormValues(term.field());
new LeafSimScorer(simScorer, context.reader(), term.field(), true); long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation freqExplanation = Explanation freqExplanation =
Explanation.match(freq, "freq, occurrences of term within document"); Explanation.match(freq, "freq, occurrences of term within document");
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); Explanation scoreExplanation = simScorer.explain(freqExplanation, norm);
return Explanation.match( return Explanation.match(
scoreExplanation.getValue(), scoreExplanation.getValue(),
"weight(" "weight("

View File

@ -18,8 +18,10 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** /**
* Expert: A <code>Scorer</code> for documents matching a <code>Term</code>. * Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
@ -29,17 +31,19 @@ import org.apache.lucene.index.SlowImpactsEnum;
public final class TermScorer extends Scorer { public final class TermScorer extends Scorer {
private final PostingsEnum postingsEnum; private final PostingsEnum postingsEnum;
private final DocIdSetIterator iterator; private final DocIdSetIterator iterator;
private final LeafSimScorer docScorer; private final SimScorer scorer;
private final NumericDocValues norms;
private final ImpactsDISI impactsDisi; private final ImpactsDISI impactsDisi;
private final MaxScoreCache maxScoreCache; private final MaxScoreCache maxScoreCache;
/** Construct a {@link TermScorer} that will iterate all documents. */ /** Construct a {@link TermScorer} that will iterate all documents. */
public TermScorer(PostingsEnum postingsEnum, LeafSimScorer docScorer) { public TermScorer(PostingsEnum postingsEnum, SimScorer scorer, NumericDocValues norms) {
iterator = this.postingsEnum = postingsEnum; iterator = this.postingsEnum = postingsEnum;
ImpactsEnum impactsEnum = new SlowImpactsEnum(postingsEnum); ImpactsEnum impactsEnum = new SlowImpactsEnum(postingsEnum);
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer()); maxScoreCache = new MaxScoreCache(impactsEnum, scorer);
impactsDisi = null; impactsDisi = null;
this.docScorer = docScorer; this.scorer = scorer;
this.norms = norms;
} }
/** /**
@ -47,12 +51,12 @@ public final class TermScorer extends Scorer {
* documents. * documents.
*/ */
public TermScorer( public TermScorer(
Weight weight,
ImpactsEnum impactsEnum, ImpactsEnum impactsEnum,
LeafSimScorer docScorer, SimScorer scorer,
NumericDocValues norms,
boolean topLevelScoringClause) { boolean topLevelScoringClause) {
postingsEnum = impactsEnum; postingsEnum = impactsEnum;
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer()); maxScoreCache = new MaxScoreCache(impactsEnum, scorer);
if (topLevelScoringClause) { if (topLevelScoringClause) {
impactsDisi = new ImpactsDISI(impactsEnum, maxScoreCache); impactsDisi = new ImpactsDISI(impactsEnum, maxScoreCache);
iterator = impactsDisi; iterator = impactsDisi;
@ -60,7 +64,8 @@ public final class TermScorer extends Scorer {
impactsDisi = null; impactsDisi = null;
iterator = impactsEnum; iterator = impactsEnum;
} }
this.docScorer = docScorer; this.scorer = scorer;
this.norms = norms;
} }
@Override @Override
@ -80,13 +85,23 @@ public final class TermScorer extends Scorer {
@Override @Override
public float score() throws IOException { public float score() throws IOException {
assert docID() != DocIdSetIterator.NO_MORE_DOCS; var postingsEnum = this.postingsEnum;
return docScorer.score(postingsEnum.docID(), postingsEnum.freq()); var norms = this.norms;
long norm = 1L;
if (norms != null && norms.advanceExact(postingsEnum.docID())) {
norm = norms.longValue();
}
return scorer.score(postingsEnum.freq(), norm);
} }
@Override @Override
public float smoothingScore(int docId) throws IOException { public float smoothingScore(int docId) throws IOException {
return docScorer.score(docId, 0); long norm = 1L;
if (norms != null && norms.advanceExact(docId)) {
norm = norms.longValue();
}
return scorer.score(0, norm);
} }
@Override @Override

View File

@ -29,6 +29,7 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
@ -345,7 +346,8 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
final int maxDoc; final int maxDoc;
final Set<Long> ords = new HashSet<>(); final Set<Long> ords = new HashSet<>();
final LeafSimScorer[] sims; final SimScorer[] sims;
final NumericDocValues norms;
final int minNrShouldMatch; final int minNrShouldMatch;
double score = Float.NaN; double score = Float.NaN;
@ -356,7 +358,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
this.maxDoc = reader.maxDoc(); this.maxDoc = reader.maxDoc();
BooleanQuery bq = (BooleanQuery) weight.getQuery(); BooleanQuery bq = (BooleanQuery) weight.getQuery();
this.minNrShouldMatch = bq.getMinimumNumberShouldMatch(); this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
this.sims = new LeafSimScorer[(int) dv.getValueCount()]; this.sims = new SimScorer[(int) dv.getValueCount()];
for (BooleanClause clause : bq.clauses()) { for (BooleanClause clause : bq.clauses()) {
assert !clause.isProhibited(); assert !clause.isProhibited();
assert !clause.isRequired(); assert !clause.isRequired();
@ -366,14 +368,14 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
boolean success = ords.add(ord); boolean success = ords.add(ord);
assert success; // no dups assert success; // no dups
TermStates ts = TermStates.build(searcher, term, true); TermStates ts = TermStates.build(searcher, term, true);
SimScorer w = sims[(int) ord] =
weight.similarity.scorer( weight.similarity.scorer(
1f, 1f,
searcher.collectionStatistics("field"), searcher.collectionStatistics("field"),
searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
sims[(int) ord] = new LeafSimScorer(w, reader, "field", true);
} }
} }
norms = reader.getNormValues("field");
} }
@Override @Override
@ -409,11 +411,15 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
continue; continue;
} }
long ord; long ord;
long norm = 1L;
if (norms != null && norms.advanceExact(currentDoc)) {
norm = norms.longValue();
}
for (int i = 0; i < dv.docValueCount(); i++) { for (int i = 0; i < dv.docValueCount(); i++) {
ord = dv.nextOrd(); ord = dv.nextOrd();
if (ords.contains(ord)) { if (ords.contains(ord)) {
currentMatched++; currentMatched++;
score += sims[(int) ord].score(currentDoc, 1); score += sims[(int) ord].score(1, norm);
} }
} }
if (currentMatched >= minNrShouldMatch) { if (currentMatched >= minNrShouldMatch) {

View File

@ -19,12 +19,12 @@ package org.apache.lucene.queries.function.valuesource;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.FloatDocValues; import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.search.similarities.TFIDFSimilarity;
@ -76,8 +76,7 @@ public class NormValueSource extends ValueSource {
1f, 1f,
new CollectionStatistics(field, 1, 1, 1, 1), new CollectionStatistics(field, 1, 1, 1, 1),
new TermStatistics(new BytesRef("bogus"), 1, 1)); new TermStatistics(new BytesRef("bogus"), 1, 1));
final LeafSimScorer leafSimScorer = final NumericDocValues norms = readerContext.reader().getNormValues(field);
new LeafSimScorer(simScorer, readerContext.reader(), field, true);
return new FloatDocValues(this) { return new FloatDocValues(this) {
int lastDocID = -1; int lastDocID = -1;
@ -88,7 +87,11 @@ public class NormValueSource extends ValueSource {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID); throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
} }
lastDocID = docID; lastDocID = docID;
return leafSimScorer.score(docID, 1f); long norm = 1L;
if (norms != null && norms.advanceExact(docID)) {
norm = norms.longValue();
}
return simScorer.score(1f, norm);
} }
}; };
} }

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
@ -32,11 +33,11 @@ import org.apache.lucene.queries.spans.Spans;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** /**
@ -190,9 +191,9 @@ public class PayloadScoreQuery extends SpanQuery {
if (spans == null) { if (spans == null) {
return null; return null;
} }
LeafSimScorer docScorer = innerWeight.getSimScorer(context); NumericDocValues norms = context.reader().getNormValues(field);
PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder);
final var scorer = new PayloadSpanScorer(payloadSpans, docScorer); final var scorer = new PayloadSpanScorer(payloadSpans, innerWeight.getSimScorer(), norms);
return new DefaultScorerSupplier(scorer); return new DefaultScorerSupplier(scorer);
} }
} }
@ -248,8 +249,9 @@ public class PayloadScoreQuery extends SpanQuery {
private final PayloadSpans spans; private final PayloadSpans spans;
private PayloadSpanScorer(PayloadSpans spans, LeafSimScorer docScorer) throws IOException { private PayloadSpanScorer(PayloadSpans spans, SimScorer scorer, NumericDocValues norms)
super(spans, docScorer); throws IOException {
super(spans, scorer, norms);
this.spans = spans; this.spans = spans;
} }

View File

@ -21,6 +21,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
@ -34,7 +35,6 @@ import org.apache.lucene.queries.spans.SpanWeight;
import org.apache.lucene.queries.spans.Spans; import org.apache.lucene.queries.spans.Spans;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -191,8 +191,8 @@ public class SpanPayloadCheckQuery extends SpanQuery {
if (spans == null) { if (spans == null) {
return null; return null;
} }
final LeafSimScorer docScorer = getSimScorer(context); final NumericDocValues norms = context.reader().getNormValues(field);
final var scorer = new SpanScorer(spans, docScorer); final var scorer = new SpanScorer(spans, getSimScorer(), norms);
return new DefaultScorerSupplier(scorer); return new DefaultScorerSupplier(scorer);
} }

View File

@ -20,10 +20,10 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
@ -144,8 +144,8 @@ public final class SpanContainingQuery extends SpanContainQuery {
if (spans == null) { if (spans == null) {
return null; return null;
} }
final LeafSimScorer docScorer = getSimScorer(context); final NumericDocValues norms = context.reader().getNormValues(field);
final var scorer = new SpanScorer(spans, docScorer); final var scorer = new SpanScorer(spans, getSimScorer(), norms);
return new DefaultScorerSupplier(scorer); return new DefaultScorerSupplier(scorer);
} }
} }

View File

@ -29,7 +29,6 @@ import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -247,8 +246,8 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
if (spans == null) { if (spans == null) {
return null; return null;
} }
final LeafSimScorer docScorer = getSimScorer(context); final var scorer =
final var scorer = new SpanScorer(spans, docScorer); new SpanScorer(spans, getSimScorer(), context.reader().getNormValues(field));
return new DefaultScorerSupplier(scorer); return new DefaultScorerSupplier(scorer);
} }
} }

View File

@ -18,10 +18,11 @@ package org.apache.lucene.queries.spans;
import java.io.IOException; import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** /**
* A basic {@link Scorer} over {@link Spans}. * A basic {@link Scorer} over {@link Spans}.
@ -31,7 +32,8 @@ import org.apache.lucene.search.TwoPhaseIterator;
public class SpanScorer extends Scorer { public class SpanScorer extends Scorer {
protected final Spans spans; protected final Spans spans;
protected final LeafSimScorer docScorer; protected final SimScorer scorer;
protected final NumericDocValues norms;
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */ /** accumulated sloppy freq (computed in setFreqCurrentDoc) */
private float freq; private float freq;
@ -39,9 +41,10 @@ public class SpanScorer extends Scorer {
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
/** Sole constructor. */ /** Sole constructor. */
public SpanScorer(Spans spans, LeafSimScorer docScorer) { public SpanScorer(Spans spans, SimScorer scorer, NumericDocValues norms) {
this.spans = Objects.requireNonNull(spans); this.spans = Objects.requireNonNull(spans);
this.docScorer = docScorer; this.scorer = scorer;
this.norms = norms;
} }
/** return the Spans for this Scorer * */ /** return the Spans for this Scorer * */
@ -69,8 +72,12 @@ public class SpanScorer extends Scorer {
* slop-adjusted {@link #freq}. * slop-adjusted {@link #freq}.
*/ */
protected float scoreCurrentDoc() throws IOException { protected float scoreCurrentDoc() throws IOException {
assert docScorer != null : getClass() + " has a null docScorer!"; assert scorer != null : getClass() + " has a null docScorer!";
return docScorer.score(docID(), freq); long norm = 1L;
if (norms != null && norms.advanceExact(docID())) {
norm = norms.longValue();
}
return scorer.score(freq, norm);
} }
/** /**
@ -98,7 +105,7 @@ public class SpanScorer extends Scorer {
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased // assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased
// endPos="+endPos; // endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos=" + endPos; assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos=" + endPos;
if (docScorer == null) { // scores not required, break out here if (scorer == null) { // scores not required, break out here
freq = 1; freq = 1;
return; return;
} }

View File

@ -160,7 +160,7 @@ public class SpanTermQuery extends SpanQuery {
final PostingsEnum postings = final PostingsEnum postings =
termsEnum.postings(null, requiredPostings.getRequiredPostings()); termsEnum.postings(null, requiredPostings.getRequiredPostings());
float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST; float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
return new TermSpans(getSimScorer(context), postings, term, positionsCost); return new TermSpans(postings, term, positionsCost);
} }
} }

View File

@ -22,13 +22,13 @@ import java.util.Comparator;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches; import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesIterator; import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils; import org.apache.lucene.search.MatchesUtils;
@ -38,6 +38,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
/** Expert-only. Public for use by other weight implementations */ /** Expert-only. Public for use by other weight implementations */
@ -142,8 +143,8 @@ public abstract class SpanWeight extends Weight {
if (spans == null) { if (spans == null) {
return null; return null;
} }
final LeafSimScorer docScorer = getSimScorer(context); final NumericDocValues norms = context.reader().getNormValues(field);
final var scorer = new SpanScorer(spans, docScorer); final var scorer = new SpanScorer(spans, simScorer, norms);
return new ScorerSupplier() { return new ScorerSupplier() {
@Override @Override
public SpanScorer get(long leadCost) throws IOException { public SpanScorer get(long leadCost) throws IOException {
@ -157,15 +158,9 @@ public abstract class SpanWeight extends Weight {
}; };
} }
/** /** Return the SimScorer */
* Return a LeafSimScorer for this context public SimScorer getSimScorer() {
* return simScorer;
* @param context the LeafReaderContext
* @return a SimWeight
* @throws IOException on error
*/
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
} }
@Override @Override
@ -176,9 +171,13 @@ public abstract class SpanWeight extends Weight {
if (newDoc == doc) { if (newDoc == doc) {
if (simScorer != null) { if (simScorer != null) {
float freq = scorer.sloppyFreq(); float freq = scorer.sloppyFreq();
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq); Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation); NumericDocValues norms = context.reader().getNormValues(field);
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
Explanation scoreExplanation = simScorer.explain(freqExplanation, norm);
return Explanation.match( return Explanation.match(
scoreExplanation.getValue(), scoreExplanation.getValue(),
"weight(" "weight("

View File

@ -21,7 +21,6 @@ import java.util.Objects;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafSimScorer;
/** /**
* Expert: Public for extension only. This does not work correctly for terms that indexed at * Expert: Public for extension only. This does not work correctly for terms that indexed at
@ -37,7 +36,7 @@ public class TermSpans extends Spans {
protected boolean readPayload; protected boolean readPayload;
private final float positionsCost; private final float positionsCost;
public TermSpans(LeafSimScorer scorer, PostingsEnum postings, Term term, float positionsCost) { public TermSpans(PostingsEnum postings, Term term, float positionsCost) {
this.postings = Objects.requireNonNull(postings); this.postings = Objects.requireNonNull(postings);
this.term = Objects.requireNonNull(term); this.term = Objects.requireNonNull(term);
this.doc = -1; this.doc = -1;

View File

@ -23,8 +23,8 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
/** Wraps a SpanWeight with additional asserts */ /** Wraps a SpanWeight with additional asserts */
public class AssertingSpanWeight extends SpanWeight { public class AssertingSpanWeight extends SpanWeight {
@ -55,8 +55,8 @@ public class AssertingSpanWeight extends SpanWeight {
} }
@Override @Override
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException { public SimScorer getSimScorer() {
return in.getSimScorer(context); return in.getSimScorer();
} }
@Override @Override

View File

@ -45,7 +45,6 @@ import org.apache.lucene.search.DisjunctionDISIApproximation;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches; import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
@ -402,14 +401,12 @@ public final class CombinedFieldQuery extends Query implements Accountable {
MultiNormsLeafSimScorer scoringSimScorer = MultiNormsLeafSimScorer scoringSimScorer =
new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true); new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
LeafSimScorer nonScoringSimScorer =
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
// we use termscorers + disjunction as an impl detail // we use termscorers + disjunction as an impl detail
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size()); DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
for (int i = 0; i < iterators.size(); i++) { for (int i = 0; i < iterators.size(); i++) {
float weight = fields.get(i).weight; float weight = fields.get(i).weight;
queue.add( queue.add(
new WeightedDisiWrapper(new TermScorer(iterators.get(i), nonScoringSimScorer), weight)); new WeightedDisiWrapper(new TermScorer(iterators.get(i), simWeight, null), weight));
} }
// Even though it is called approximation, it is accurate since none of // Even though it is called approximation, it is accurate since none of
// the sub iterators are two-phase iterators. // the sub iterators are two-phase iterators.

View File

@ -16,8 +16,6 @@
*/ */
package org.apache.lucene.sandbox.search; package org.apache.lucene.sandbox.search;
import static org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -27,13 +25,13 @@ import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.SmallFloat; import org.apache.lucene.util.SmallFloat;
/** /**
* Copy of {@link LeafSimScorer} that sums document's norms from multiple fields. * Scorer that sums document's norms from multiple fields.
* *
* <p>For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also * <p>For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also
* requires that either all fields or no fields have norms enabled. Having only some fields with * requires that either all fields or no fields have norms enabled. Having only some fields with

View File

@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -35,7 +36,6 @@ import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
@ -429,9 +429,8 @@ public class TermAutomatonQuery extends Query implements Accountable {
} }
if (any) { if (any) {
scorer = NumericDocValues norms = context.reader().getNormValues(field);
new TermAutomatonScorer( scorer = new TermAutomatonScorer(this, enums, anyTermID, stats, norms);
this, enums, anyTermID, new LeafSimScorer(stats, context.reader(), field, true));
} else { } else {
return null; return null;
} }
@ -456,15 +455,20 @@ public class TermAutomatonQuery extends Query implements Accountable {
} }
float score = scorer.score(); float score = scorer.score();
LeafSimScorer leafSimScorer = ((TermAutomatonScorer) scorer).getLeafSimScorer();
EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc(); EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc();
NumericDocValues norms = context.reader().getNormValues(field);
long norm = 1L;
if (norms != null && norms.advanceExact(doc)) {
norm = norms.longValue();
}
List<Explanation> termExplanations = new ArrayList<>(); List<Explanation> termExplanations = new ArrayList<>();
for (EnumAndScorer enumAndScorer : originalSubsOnDoc) { for (EnumAndScorer enumAndScorer : originalSubsOnDoc) {
if (enumAndScorer != null) { if (enumAndScorer != null) {
PostingsEnum postingsEnum = enumAndScorer.posEnum; PostingsEnum postingsEnum = enumAndScorer.posEnum;
if (postingsEnum.docID() == doc) { if (postingsEnum.docID() == doc) {
float termScore = leafSimScorer.score(doc, postingsEnum.freq()); float termScore = stats.score(postingsEnum.freq(), norm);
termExplanations.add( termExplanations.add(
Explanation.match( Explanation.match(
postingsEnum.freq(), postingsEnum.freq(),
@ -482,7 +486,7 @@ public class TermAutomatonQuery extends Query implements Accountable {
Explanation freqExplanation = Explanation freqExplanation =
Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations); Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations);
return leafSimScorer.explain(doc, freqExplanation); return stats.explain(freqExplanation, norm);
} }
} }

View File

@ -17,11 +17,12 @@
package org.apache.lucene.sandbox.search; package org.apache.lucene.sandbox.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.sandbox.search.TermAutomatonQuery.EnumAndScorer; import org.apache.lucene.sandbox.search.TermAutomatonQuery.EnumAndScorer;
import org.apache.lucene.sandbox.search.TermAutomatonQuery.TermAutomatonWeight; import org.apache.lucene.sandbox.search.TermAutomatonQuery.TermAutomatonWeight;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
@ -44,7 +45,8 @@ class TermAutomatonScorer extends Scorer {
// This is -1 if wildcard (null) terms were not used, else it's the id // This is -1 if wildcard (null) terms were not used, else it's the id
// of the wildcard term: // of the wildcard term:
private final int anyTermID; private final int anyTermID;
private final LeafSimScorer docScorer; private final SimScorer scorer;
private final NumericDocValues norms;
private int numSubsOnDoc; private int numSubsOnDoc;
@ -61,11 +63,16 @@ class TermAutomatonScorer extends Scorer {
private final EnumAndScorer[] originalSubsOnDoc; private final EnumAndScorer[] originalSubsOnDoc;
public TermAutomatonScorer( public TermAutomatonScorer(
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer) TermAutomatonWeight weight,
EnumAndScorer[] subs,
int anyTermID,
SimScorer scorer,
NumericDocValues norms)
throws IOException { throws IOException {
// System.out.println(" automaton:\n" + weight.automaton.toDot()); // System.out.println(" automaton:\n" + weight.automaton.toDot());
this.runAutomaton = new TermRunAutomaton(weight.automaton, subs.length); this.runAutomaton = new TermRunAutomaton(weight.automaton, subs.length);
this.docScorer = docScorer; this.scorer = scorer;
this.norms = norms;
this.docIDQueue = new DocIDQueue(subs.length); this.docIDQueue = new DocIDQueue(subs.length);
this.posQueue = new PositionQueue(subs.length); this.posQueue = new PositionQueue(subs.length);
this.anyTermID = anyTermID; this.anyTermID = anyTermID;
@ -356,10 +363,6 @@ class TermAutomatonScorer extends Scorer {
return originalSubsOnDoc; return originalSubsOnDoc;
} }
LeafSimScorer getLeafSimScorer() {
return docScorer;
}
@Override @Override
public int docID() { public int docID() {
return docID; return docID;
@ -369,12 +372,16 @@ class TermAutomatonScorer extends Scorer {
public float score() throws IOException { public float score() throws IOException {
// TODO: we could probably do better here, e.g. look @ freqs of actual terms involved in this // TODO: we could probably do better here, e.g. look @ freqs of actual terms involved in this
// doc and score differently // doc and score differently
return docScorer.score(docID, freq); long norm = 1L;
if (norms != null && norms.advanceExact(docID)) {
norm = norms.longValue();
}
return scorer.score(freq, norm);
} }
@Override @Override
public float getMaxScore(int upTo) throws IOException { public float getMaxScore(int upTo) throws IOException {
return docScorer.getSimScorer().score(Float.MAX_VALUE, 1L); return scorer.score(Float.MAX_VALUE, 1L);
} }
static class TermRunAutomaton extends RunAutomaton { static class TermRunAutomaton extends RunAutomaton {