mirror of https://github.com/apache/lucene.git
Remove LeafSimScorer abstraction. (#13957)
`LeafSimScorer` is a specialization of a `SimScorer` for a given segment. It doesn't add much value, but benchmarks suggest that it adds measurable overhead to queries sorted by score.
This commit is contained in:
parent
b12ee52999
commit
a3a00f3f58
|
@ -9,6 +9,9 @@ API Changes
|
|||
---------------------
|
||||
* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)
|
||||
|
||||
* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
|
||||
compute scores directly from a SimScorer, postings and norms. (Adrien Grand)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
|
@ -120,7 +119,6 @@ final class FeatureQuery extends Query {
|
|||
|
||||
@Override
|
||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
final Weight thisWeight = this;
|
||||
Terms terms = Terms.getTerms(context.reader(), fieldName);
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
|
||||
|
@ -135,10 +133,8 @@ final class FeatureQuery extends Query {
|
|||
@Override
|
||||
public Scorer get(long leadCost) throws IOException {
|
||||
final SimScorer scorer = function.scorer(boost);
|
||||
final LeafSimScorer simScorer =
|
||||
new LeafSimScorer(scorer, context.reader(), fieldName, false);
|
||||
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
return new TermScorer(thisWeight, impacts, simScorer, topLevelScoringClause);
|
||||
return new TermScorer(impacts, scorer, null, topLevelScoringClause);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
/** {@link SimScorer} on a specific {@link LeafReader}. */
|
||||
public final class LeafSimScorer {
|
||||
|
||||
private final SimScorer scorer;
|
||||
private final NumericDocValues norms;
|
||||
|
||||
/** Sole constructor: Score documents of {@code reader} with {@code scorer}. */
|
||||
public LeafSimScorer(SimScorer scorer, LeafReader reader, String field, boolean needsScores)
|
||||
throws IOException {
|
||||
this.scorer = Objects.requireNonNull(scorer);
|
||||
norms = needsScores ? reader.getNormValues(field) : null;
|
||||
}
|
||||
|
||||
/** Return the wrapped {@link SimScorer}. */
|
||||
public SimScorer getSimScorer() {
|
||||
return scorer;
|
||||
}
|
||||
|
||||
private long getNormValue(int doc) throws IOException {
|
||||
if (norms != null) {
|
||||
boolean found = norms.advanceExact(doc);
|
||||
assert found;
|
||||
return norms.longValue();
|
||||
} else {
|
||||
return 1L; // default norm
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Score the provided document assuming the given term document frequency. This method must be
|
||||
* called on non-decreasing sequences of doc ids.
|
||||
*
|
||||
* @see SimScorer#score(float, long)
|
||||
*/
|
||||
public float score(int doc, float freq) throws IOException {
|
||||
return scorer.score(freq, getNormValue(doc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Explain the score for the provided document assuming the given term document frequency. This
|
||||
* method must be called on non-decreasing sequences of doc ids.
|
||||
*
|
||||
* @see SimScorer#explain(Explanation, long)
|
||||
*/
|
||||
public Explanation explain(int doc, Explanation freqExpl) throws IOException {
|
||||
return scorer.explain(freqExpl, getNormValue(doc));
|
||||
}
|
||||
}
|
|
@ -18,6 +18,8 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
class PhraseScorer extends Scorer {
|
||||
|
||||
|
@ -26,16 +28,19 @@ class PhraseScorer extends Scorer {
|
|||
final MaxScoreCache maxScoreCache;
|
||||
final PhraseMatcher matcher;
|
||||
final ScoreMode scoreMode;
|
||||
private final LeafSimScorer simScorer;
|
||||
private final SimScorer simScorer;
|
||||
private final NumericDocValues norms;
|
||||
final float matchCost;
|
||||
|
||||
private float minCompetitiveScore = 0;
|
||||
private float freq = 0;
|
||||
|
||||
PhraseScorer(PhraseMatcher matcher, ScoreMode scoreMode, LeafSimScorer simScorer) {
|
||||
PhraseScorer(
|
||||
PhraseMatcher matcher, ScoreMode scoreMode, SimScorer simScorer, NumericDocValues norms) {
|
||||
this.matcher = matcher;
|
||||
this.scoreMode = scoreMode;
|
||||
this.simScorer = simScorer;
|
||||
this.norms = norms;
|
||||
this.matchCost = matcher.getMatchCost();
|
||||
this.approximation = matcher.approximation();
|
||||
this.impactsApproximation = matcher.impactsApproximation();
|
||||
|
@ -50,7 +55,11 @@ class PhraseScorer extends Scorer {
|
|||
matcher.reset();
|
||||
if (scoreMode == ScoreMode.TOP_SCORES && minCompetitiveScore > 0) {
|
||||
float maxFreq = matcher.maxFreq();
|
||||
if (simScorer.score(docID(), maxFreq) < minCompetitiveScore) {
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(docID())) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
if (simScorer.score(maxFreq, norm) < minCompetitiveScore) {
|
||||
// The maximum score we could get is less than the min competitive score
|
||||
return false;
|
||||
}
|
||||
|
@ -79,7 +88,11 @@ class PhraseScorer extends Scorer {
|
|||
freq += matcher.sloppyWeight();
|
||||
}
|
||||
}
|
||||
return simScorer.score(docID(), freq);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(docID())) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return simScorer.score(freq, norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
|
@ -63,9 +64,8 @@ public abstract class PhraseWeight extends Weight {
|
|||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, stats, false);
|
||||
if (matcher == null) return null;
|
||||
LeafSimScorer simScorer =
|
||||
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
|
||||
final var scorer = new PhraseScorer(matcher, scoreMode, simScorer);
|
||||
NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
|
||||
final var scorer = new PhraseScorer(matcher, scoreMode, stats, norms);
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
}
|
||||
|
||||
|
@ -83,10 +83,13 @@ public abstract class PhraseWeight extends Weight {
|
|||
while (matcher.nextMatch()) {
|
||||
freq += matcher.sloppyWeight();
|
||||
}
|
||||
LeafSimScorer docScorer =
|
||||
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(doc)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
Explanation scoreExplanation = stats.explain(freqExplanation, norm);
|
||||
return Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
"weight("
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.Impacts;
|
|||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -38,6 +39,7 @@ import org.apache.lucene.index.TermStates;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOSupplier;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
@ -259,9 +261,13 @@ public final class SynonymQuery extends Query {
|
|||
assert scorer instanceof TermScorer;
|
||||
freq = ((TermScorer) scorer).freq();
|
||||
}
|
||||
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
|
||||
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
NumericDocValues norms = context.reader().getNormValues(field);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(doc)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
Explanation scoreExplanation = simWeight.explain(freqExplanation, norm);
|
||||
return Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
"weight("
|
||||
|
@ -334,27 +340,27 @@ public final class SynonymQuery extends Query {
|
|||
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
|
||||
}
|
||||
|
||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
|
||||
NumericDocValues norms = context.reader().getNormValues(field);
|
||||
|
||||
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
|
||||
if (iterators.size() == 1) {
|
||||
final TermScorer scorer;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
scorer = new TermScorer(impacts.get(0), simScorer);
|
||||
scorer = new TermScorer(impacts.get(0), simWeight, norms);
|
||||
} else {
|
||||
scorer = new TermScorer(iterators.get(0), simScorer);
|
||||
scorer = new TermScorer(iterators.get(0), simWeight, norms);
|
||||
}
|
||||
float boost = termBoosts.get(0);
|
||||
return scoreMode == ScoreMode.COMPLETE_NO_SCORES || boost == 1f
|
||||
? scorer
|
||||
: new FreqBoostTermScorer(boost, scorer, simScorer);
|
||||
: new FreqBoostTermScorer(boost, scorer, simWeight, norms);
|
||||
} else {
|
||||
|
||||
// we use termscorers + disjunction as an impl detail
|
||||
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
|
||||
for (int i = 0; i < iterators.size(); i++) {
|
||||
PostingsEnum postings = iterators.get(i);
|
||||
final TermScorer termScorer = new TermScorer(postings, simScorer);
|
||||
final TermScorer termScorer = new TermScorer(postings, simWeight, norms);
|
||||
float boost = termBoosts.get(i);
|
||||
final DisiWrapperFreq wrapper = new DisiWrapperFreq(termScorer, boost);
|
||||
queue.add(wrapper);
|
||||
|
@ -368,8 +374,7 @@ public final class SynonymQuery extends Query {
|
|||
boosts[i] = termBoosts.get(i);
|
||||
}
|
||||
ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]), boosts);
|
||||
MaxScoreCache maxScoreCache =
|
||||
new MaxScoreCache(impactsSource, simScorer.getSimScorer());
|
||||
MaxScoreCache maxScoreCache = new MaxScoreCache(impactsSource, simWeight);
|
||||
ImpactsDISI impactsDisi = new ImpactsDISI(iterator, maxScoreCache);
|
||||
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
|
@ -379,7 +384,7 @@ public final class SynonymQuery extends Query {
|
|||
iterator = impactsDisi;
|
||||
}
|
||||
|
||||
return new SynonymScorer(queue, iterator, impactsDisi, simScorer);
|
||||
return new SynonymScorer(queue, iterator, impactsDisi, simWeight, norms);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -575,18 +580,21 @@ public final class SynonymQuery extends Query {
|
|||
private final DocIdSetIterator iterator;
|
||||
private final MaxScoreCache maxScoreCache;
|
||||
private final ImpactsDISI impactsDisi;
|
||||
private final LeafSimScorer simScorer;
|
||||
private final SimScorer scorer;
|
||||
private final NumericDocValues norms;
|
||||
|
||||
SynonymScorer(
|
||||
DisiPriorityQueue queue,
|
||||
DocIdSetIterator iterator,
|
||||
ImpactsDISI impactsDisi,
|
||||
LeafSimScorer simScorer) {
|
||||
SimScorer scorer,
|
||||
NumericDocValues norms) {
|
||||
this.queue = queue;
|
||||
this.iterator = iterator;
|
||||
this.maxScoreCache = impactsDisi.getMaxScoreCache();
|
||||
this.impactsDisi = impactsDisi;
|
||||
this.simScorer = simScorer;
|
||||
this.scorer = scorer;
|
||||
this.norms = norms;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -605,7 +613,11 @@ public final class SynonymQuery extends Query {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return simScorer.score(iterator.docID(), freq());
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(iterator.docID())) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return scorer.score(freq(), norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -647,9 +659,11 @@ public final class SynonymQuery extends Query {
|
|||
private static class FreqBoostTermScorer extends FilterScorer {
|
||||
final float boost;
|
||||
final TermScorer in;
|
||||
final LeafSimScorer docScorer;
|
||||
final SimScorer scorer;
|
||||
final NumericDocValues norms;
|
||||
|
||||
public FreqBoostTermScorer(float boost, TermScorer in, LeafSimScorer docScorer) {
|
||||
public FreqBoostTermScorer(
|
||||
float boost, TermScorer in, SimScorer scorer, NumericDocValues norms) {
|
||||
super(in);
|
||||
if (Float.isNaN(boost) || Float.compare(boost, 0f) < 0 || Float.compare(boost, 1f) > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -657,7 +671,8 @@ public final class SynonymQuery extends Query {
|
|||
}
|
||||
this.boost = boost;
|
||||
this.in = in;
|
||||
this.docScorer = docScorer;
|
||||
this.scorer = scorer;
|
||||
this.norms = norms;
|
||||
}
|
||||
|
||||
float freq() throws IOException {
|
||||
|
@ -666,8 +681,11 @@ public final class SynonymQuery extends Query {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
assert docID() != DocIdSetIterator.NO_MORE_DOCS;
|
||||
return docScorer.score(in.docID(), freq());
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(in.docID())) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return scorer.score(freq(), norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -150,19 +151,17 @@ public class TermQuery extends Query {
|
|||
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
|
||||
}
|
||||
|
||||
LeafSimScorer scorer =
|
||||
new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
|
||||
NumericDocValues norms = null;
|
||||
if (scoreMode.needsScores()) {
|
||||
norms = context.reader().getNormValues(term.field());
|
||||
}
|
||||
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
return new TermScorer(
|
||||
TermWeight.this,
|
||||
termsEnum.impacts(PostingsEnum.FREQS),
|
||||
scorer,
|
||||
topLevelScoringClause);
|
||||
termsEnum.impacts(PostingsEnum.FREQS), simScorer, norms, topLevelScoringClause);
|
||||
} else {
|
||||
return new TermScorer(
|
||||
termsEnum.postings(
|
||||
null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE),
|
||||
scorer);
|
||||
int flags = scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE;
|
||||
return new TermScorer(termsEnum.postings(null, flags), simScorer, norms);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -223,11 +222,14 @@ public class TermQuery extends Query {
|
|||
int newDoc = scorer.iterator().advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = ((TermScorer) scorer).freq();
|
||||
LeafSimScorer docScorer =
|
||||
new LeafSimScorer(simScorer, context.reader(), term.field(), true);
|
||||
NumericDocValues norms = context.reader().getNormValues(term.field());
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(doc)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
Explanation freqExplanation =
|
||||
Explanation.match(freq, "freq, occurrences of term within document");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
Explanation scoreExplanation = simScorer.explain(freqExplanation, norm);
|
||||
return Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
"weight("
|
||||
|
|
|
@ -18,8 +18,10 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
/**
|
||||
* Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
|
||||
|
@ -29,17 +31,19 @@ import org.apache.lucene.index.SlowImpactsEnum;
|
|||
public final class TermScorer extends Scorer {
|
||||
private final PostingsEnum postingsEnum;
|
||||
private final DocIdSetIterator iterator;
|
||||
private final LeafSimScorer docScorer;
|
||||
private final SimScorer scorer;
|
||||
private final NumericDocValues norms;
|
||||
private final ImpactsDISI impactsDisi;
|
||||
private final MaxScoreCache maxScoreCache;
|
||||
|
||||
/** Construct a {@link TermScorer} that will iterate all documents. */
|
||||
public TermScorer(PostingsEnum postingsEnum, LeafSimScorer docScorer) {
|
||||
public TermScorer(PostingsEnum postingsEnum, SimScorer scorer, NumericDocValues norms) {
|
||||
iterator = this.postingsEnum = postingsEnum;
|
||||
ImpactsEnum impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, scorer);
|
||||
impactsDisi = null;
|
||||
this.docScorer = docScorer;
|
||||
this.scorer = scorer;
|
||||
this.norms = norms;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -47,12 +51,12 @@ public final class TermScorer extends Scorer {
|
|||
* documents.
|
||||
*/
|
||||
public TermScorer(
|
||||
Weight weight,
|
||||
ImpactsEnum impactsEnum,
|
||||
LeafSimScorer docScorer,
|
||||
SimScorer scorer,
|
||||
NumericDocValues norms,
|
||||
boolean topLevelScoringClause) {
|
||||
postingsEnum = impactsEnum;
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, scorer);
|
||||
if (topLevelScoringClause) {
|
||||
impactsDisi = new ImpactsDISI(impactsEnum, maxScoreCache);
|
||||
iterator = impactsDisi;
|
||||
|
@ -60,7 +64,8 @@ public final class TermScorer extends Scorer {
|
|||
impactsDisi = null;
|
||||
iterator = impactsEnum;
|
||||
}
|
||||
this.docScorer = docScorer;
|
||||
this.scorer = scorer;
|
||||
this.norms = norms;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -80,13 +85,23 @@ public final class TermScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
assert docID() != DocIdSetIterator.NO_MORE_DOCS;
|
||||
return docScorer.score(postingsEnum.docID(), postingsEnum.freq());
|
||||
var postingsEnum = this.postingsEnum;
|
||||
var norms = this.norms;
|
||||
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(postingsEnum.docID())) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return scorer.score(postingsEnum.freq(), norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float smoothingScore(int docId) throws IOException {
|
||||
return docScorer.score(docId, 0);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(docId)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return scorer.score(0, norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
|
@ -345,7 +346,8 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
final int maxDoc;
|
||||
|
||||
final Set<Long> ords = new HashSet<>();
|
||||
final LeafSimScorer[] sims;
|
||||
final SimScorer[] sims;
|
||||
final NumericDocValues norms;
|
||||
final int minNrShouldMatch;
|
||||
|
||||
double score = Float.NaN;
|
||||
|
@ -356,7 +358,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
this.maxDoc = reader.maxDoc();
|
||||
BooleanQuery bq = (BooleanQuery) weight.getQuery();
|
||||
this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
|
||||
this.sims = new LeafSimScorer[(int) dv.getValueCount()];
|
||||
this.sims = new SimScorer[(int) dv.getValueCount()];
|
||||
for (BooleanClause clause : bq.clauses()) {
|
||||
assert !clause.isProhibited();
|
||||
assert !clause.isRequired();
|
||||
|
@ -366,14 +368,14 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
boolean success = ords.add(ord);
|
||||
assert success; // no dups
|
||||
TermStates ts = TermStates.build(searcher, term, true);
|
||||
SimScorer w =
|
||||
sims[(int) ord] =
|
||||
weight.similarity.scorer(
|
||||
1f,
|
||||
searcher.collectionStatistics("field"),
|
||||
searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
|
||||
sims[(int) ord] = new LeafSimScorer(w, reader, "field", true);
|
||||
}
|
||||
}
|
||||
norms = reader.getNormValues("field");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -409,11 +411,15 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
continue;
|
||||
}
|
||||
long ord;
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(currentDoc)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
for (int i = 0; i < dv.docValueCount(); i++) {
|
||||
ord = dv.nextOrd();
|
||||
if (ords.contains(ord)) {
|
||||
currentMatched++;
|
||||
score += sims[(int) ord].score(currentDoc, 1);
|
||||
score += sims[(int) ord].score(1, norm);
|
||||
}
|
||||
}
|
||||
if (currentMatched >= minNrShouldMatch) {
|
||||
|
|
|
@ -19,12 +19,12 @@ package org.apache.lucene.queries.function.valuesource;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
|
@ -76,8 +76,7 @@ public class NormValueSource extends ValueSource {
|
|||
1f,
|
||||
new CollectionStatistics(field, 1, 1, 1, 1),
|
||||
new TermStatistics(new BytesRef("bogus"), 1, 1));
|
||||
final LeafSimScorer leafSimScorer =
|
||||
new LeafSimScorer(simScorer, readerContext.reader(), field, true);
|
||||
final NumericDocValues norms = readerContext.reader().getNormValues(field);
|
||||
|
||||
return new FloatDocValues(this) {
|
||||
int lastDocID = -1;
|
||||
|
@ -88,7 +87,11 @@ public class NormValueSource extends ValueSource {
|
|||
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
|
||||
}
|
||||
lastDocID = docID;
|
||||
return leafSimScorer.score(docID, 1f);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(docID)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return simScorer.score(1f, norm);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
|
@ -32,11 +33,11 @@ import org.apache.lucene.queries.spans.Spans;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -190,9 +191,9 @@ public class PayloadScoreQuery extends SpanQuery {
|
|||
if (spans == null) {
|
||||
return null;
|
||||
}
|
||||
LeafSimScorer docScorer = innerWeight.getSimScorer(context);
|
||||
NumericDocValues norms = context.reader().getNormValues(field);
|
||||
PayloadSpans payloadSpans = new PayloadSpans(spans, decoder);
|
||||
final var scorer = new PayloadSpanScorer(payloadSpans, docScorer);
|
||||
final var scorer = new PayloadSpanScorer(payloadSpans, innerWeight.getSimScorer(), norms);
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
}
|
||||
}
|
||||
|
@ -248,8 +249,9 @@ public class PayloadScoreQuery extends SpanQuery {
|
|||
|
||||
private final PayloadSpans spans;
|
||||
|
||||
private PayloadSpanScorer(PayloadSpans spans, LeafSimScorer docScorer) throws IOException {
|
||||
super(spans, docScorer);
|
||||
private PayloadSpanScorer(PayloadSpans spans, SimScorer scorer, NumericDocValues norms)
|
||||
throws IOException {
|
||||
super(spans, scorer, norms);
|
||||
this.spans = spans;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
|
@ -34,7 +35,6 @@ import org.apache.lucene.queries.spans.SpanWeight;
|
|||
import org.apache.lucene.queries.spans.Spans;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
|
@ -191,8 +191,8 @@ public class SpanPayloadCheckQuery extends SpanQuery {
|
|||
if (spans == null) {
|
||||
return null;
|
||||
}
|
||||
final LeafSimScorer docScorer = getSimScorer(context);
|
||||
final var scorer = new SpanScorer(spans, docScorer);
|
||||
final NumericDocValues norms = context.reader().getNormValues(field);
|
||||
final var scorer = new SpanScorer(spans, getSimScorer(), norms);
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,10 +20,10 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
|
||||
|
@ -144,8 +144,8 @@ public final class SpanContainingQuery extends SpanContainQuery {
|
|||
if (spans == null) {
|
||||
return null;
|
||||
}
|
||||
final LeafSimScorer docScorer = getSimScorer(context);
|
||||
final var scorer = new SpanScorer(spans, docScorer);
|
||||
final NumericDocValues norms = context.reader().getNormValues(field);
|
||||
final var scorer = new SpanScorer(spans, getSimScorer(), norms);
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.index.TermStates;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
|
@ -247,8 +246,8 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
if (spans == null) {
|
||||
return null;
|
||||
}
|
||||
final LeafSimScorer docScorer = getSimScorer(context);
|
||||
final var scorer = new SpanScorer(spans, docScorer);
|
||||
final var scorer =
|
||||
new SpanScorer(spans, getSimScorer(), context.reader().getNormValues(field));
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,10 +18,11 @@ package org.apache.lucene.queries.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
/**
|
||||
* A basic {@link Scorer} over {@link Spans}.
|
||||
|
@ -31,7 +32,8 @@ import org.apache.lucene.search.TwoPhaseIterator;
|
|||
public class SpanScorer extends Scorer {
|
||||
|
||||
protected final Spans spans;
|
||||
protected final LeafSimScorer docScorer;
|
||||
protected final SimScorer scorer;
|
||||
protected final NumericDocValues norms;
|
||||
|
||||
/** accumulated sloppy freq (computed in setFreqCurrentDoc) */
|
||||
private float freq;
|
||||
|
@ -39,9 +41,10 @@ public class SpanScorer extends Scorer {
|
|||
private int lastScoredDoc = -1; // last doc we called setFreqCurrentDoc() for
|
||||
|
||||
/** Sole constructor. */
|
||||
public SpanScorer(Spans spans, LeafSimScorer docScorer) {
|
||||
public SpanScorer(Spans spans, SimScorer scorer, NumericDocValues norms) {
|
||||
this.spans = Objects.requireNonNull(spans);
|
||||
this.docScorer = docScorer;
|
||||
this.scorer = scorer;
|
||||
this.norms = norms;
|
||||
}
|
||||
|
||||
/** return the Spans for this Scorer * */
|
||||
|
@ -69,8 +72,12 @@ public class SpanScorer extends Scorer {
|
|||
* slop-adjusted {@link #freq}.
|
||||
*/
|
||||
protected float scoreCurrentDoc() throws IOException {
|
||||
assert docScorer != null : getClass() + " has a null docScorer!";
|
||||
return docScorer.score(docID(), freq);
|
||||
assert scorer != null : getClass() + " has a null docScorer!";
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(docID())) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return scorer.score(freq, norm);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -98,7 +105,7 @@ public class SpanScorer extends Scorer {
|
|||
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased
|
||||
// endPos="+endPos;
|
||||
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos=" + endPos;
|
||||
if (docScorer == null) { // scores not required, break out here
|
||||
if (scorer == null) { // scores not required, break out here
|
||||
freq = 1;
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -160,7 +160,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
final PostingsEnum postings =
|
||||
termsEnum.postings(null, requiredPostings.getRequiredPostings());
|
||||
float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
|
||||
return new TermSpans(getSimScorer(context), postings, term, positionsCost);
|
||||
return new TermSpans(postings, term, positionsCost);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,13 +22,13 @@ import java.util.Comparator;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
|
@ -38,6 +38,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/** Expert-only. Public for use by other weight implementations */
|
||||
|
@ -142,8 +143,8 @@ public abstract class SpanWeight extends Weight {
|
|||
if (spans == null) {
|
||||
return null;
|
||||
}
|
||||
final LeafSimScorer docScorer = getSimScorer(context);
|
||||
final var scorer = new SpanScorer(spans, docScorer);
|
||||
final NumericDocValues norms = context.reader().getNormValues(field);
|
||||
final var scorer = new SpanScorer(spans, simScorer, norms);
|
||||
return new ScorerSupplier() {
|
||||
@Override
|
||||
public SpanScorer get(long leadCost) throws IOException {
|
||||
|
@ -157,15 +158,9 @@ public abstract class SpanWeight extends Weight {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a LeafSimScorer for this context
|
||||
*
|
||||
* @param context the LeafReaderContext
|
||||
* @return a SimWeight
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
|
||||
return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
|
||||
/** Return the SimScorer */
|
||||
public SimScorer getSimScorer() {
|
||||
return simScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -176,9 +171,13 @@ public abstract class SpanWeight extends Weight {
|
|||
if (newDoc == doc) {
|
||||
if (simScorer != null) {
|
||||
float freq = scorer.sloppyFreq();
|
||||
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
|
||||
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
NumericDocValues norms = context.reader().getNormValues(field);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(doc)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
Explanation scoreExplanation = simScorer.explain(freqExplanation, norm);
|
||||
return Explanation.match(
|
||||
scoreExplanation.getValue(),
|
||||
"weight("
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
|
||||
/**
|
||||
* Expert: Public for extension only. This does not work correctly for terms that indexed at
|
||||
|
@ -37,7 +36,7 @@ public class TermSpans extends Spans {
|
|||
protected boolean readPayload;
|
||||
private final float positionsCost;
|
||||
|
||||
public TermSpans(LeafSimScorer scorer, PostingsEnum postings, Term term, float positionsCost) {
|
||||
public TermSpans(PostingsEnum postings, Term term, float positionsCost) {
|
||||
this.postings = Objects.requireNonNull(postings);
|
||||
this.term = Objects.requireNonNull(term);
|
||||
this.doc = -1;
|
||||
|
|
|
@ -23,8 +23,8 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
|
||||
/** Wraps a SpanWeight with additional asserts */
|
||||
public class AssertingSpanWeight extends SpanWeight {
|
||||
|
@ -55,8 +55,8 @@ public class AssertingSpanWeight extends SpanWeight {
|
|||
}
|
||||
|
||||
@Override
|
||||
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
|
||||
return in.getSimScorer(context);
|
||||
public SimScorer getSimScorer() {
|
||||
return in.getSimScorer();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -45,7 +45,6 @@ import org.apache.lucene.search.DisjunctionDISIApproximation;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
|
@ -402,14 +401,12 @@ public final class CombinedFieldQuery extends Query implements Accountable {
|
|||
|
||||
MultiNormsLeafSimScorer scoringSimScorer =
|
||||
new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
|
||||
LeafSimScorer nonScoringSimScorer =
|
||||
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
|
||||
// we use termscorers + disjunction as an impl detail
|
||||
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
|
||||
for (int i = 0; i < iterators.size(); i++) {
|
||||
float weight = fields.get(i).weight;
|
||||
queue.add(
|
||||
new WeightedDisiWrapper(new TermScorer(iterators.get(i), nonScoringSimScorer), weight));
|
||||
new WeightedDisiWrapper(new TermScorer(iterators.get(i), simWeight, null), weight));
|
||||
}
|
||||
// Even though it is called approximation, it is accurate since none of
|
||||
// the sub iterators are two-phase iterators.
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
*/
|
||||
package org.apache.lucene.sandbox.search;
|
||||
|
||||
import static org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -27,13 +25,13 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
/**
|
||||
* Copy of {@link LeafSimScorer} that sums document's norms from multiple fields.
|
||||
* Scorer that sums document's norms from multiple fields.
|
||||
*
|
||||
* <p>For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also
|
||||
* requires that either all fields or no fields have norms enabled. Having only some fields with
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -35,7 +36,6 @@ import org.apache.lucene.queries.spans.SpanNearQuery;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
|
@ -429,9 +429,8 @@ public class TermAutomatonQuery extends Query implements Accountable {
|
|||
}
|
||||
|
||||
if (any) {
|
||||
scorer =
|
||||
new TermAutomatonScorer(
|
||||
this, enums, anyTermID, new LeafSimScorer(stats, context.reader(), field, true));
|
||||
NumericDocValues norms = context.reader().getNormValues(field);
|
||||
scorer = new TermAutomatonScorer(this, enums, anyTermID, stats, norms);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
@ -456,15 +455,20 @@ public class TermAutomatonQuery extends Query implements Accountable {
|
|||
}
|
||||
|
||||
float score = scorer.score();
|
||||
LeafSimScorer leafSimScorer = ((TermAutomatonScorer) scorer).getLeafSimScorer();
|
||||
EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc();
|
||||
|
||||
NumericDocValues norms = context.reader().getNormValues(field);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(doc)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
|
||||
List<Explanation> termExplanations = new ArrayList<>();
|
||||
for (EnumAndScorer enumAndScorer : originalSubsOnDoc) {
|
||||
if (enumAndScorer != null) {
|
||||
PostingsEnum postingsEnum = enumAndScorer.posEnum;
|
||||
if (postingsEnum.docID() == doc) {
|
||||
float termScore = leafSimScorer.score(doc, postingsEnum.freq());
|
||||
float termScore = stats.score(postingsEnum.freq(), norm);
|
||||
termExplanations.add(
|
||||
Explanation.match(
|
||||
postingsEnum.freq(),
|
||||
|
@ -482,7 +486,7 @@ public class TermAutomatonQuery extends Query implements Accountable {
|
|||
|
||||
Explanation freqExplanation =
|
||||
Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations);
|
||||
return leafSimScorer.explain(doc, freqExplanation);
|
||||
return stats.explain(freqExplanation, norm);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,11 +17,12 @@
|
|||
package org.apache.lucene.sandbox.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.sandbox.search.TermAutomatonQuery.EnumAndScorer;
|
||||
import org.apache.lucene.sandbox.search.TermAutomatonQuery.TermAutomatonWeight;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.LeafSimScorer;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
@ -44,7 +45,8 @@ class TermAutomatonScorer extends Scorer {
|
|||
// This is -1 if wildcard (null) terms were not used, else it's the id
|
||||
// of the wildcard term:
|
||||
private final int anyTermID;
|
||||
private final LeafSimScorer docScorer;
|
||||
private final SimScorer scorer;
|
||||
private final NumericDocValues norms;
|
||||
|
||||
private int numSubsOnDoc;
|
||||
|
||||
|
@ -61,11 +63,16 @@ class TermAutomatonScorer extends Scorer {
|
|||
private final EnumAndScorer[] originalSubsOnDoc;
|
||||
|
||||
public TermAutomatonScorer(
|
||||
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer)
|
||||
TermAutomatonWeight weight,
|
||||
EnumAndScorer[] subs,
|
||||
int anyTermID,
|
||||
SimScorer scorer,
|
||||
NumericDocValues norms)
|
||||
throws IOException {
|
||||
// System.out.println(" automaton:\n" + weight.automaton.toDot());
|
||||
this.runAutomaton = new TermRunAutomaton(weight.automaton, subs.length);
|
||||
this.docScorer = docScorer;
|
||||
this.scorer = scorer;
|
||||
this.norms = norms;
|
||||
this.docIDQueue = new DocIDQueue(subs.length);
|
||||
this.posQueue = new PositionQueue(subs.length);
|
||||
this.anyTermID = anyTermID;
|
||||
|
@ -356,10 +363,6 @@ class TermAutomatonScorer extends Scorer {
|
|||
return originalSubsOnDoc;
|
||||
}
|
||||
|
||||
LeafSimScorer getLeafSimScorer() {
|
||||
return docScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
|
@ -369,12 +372,16 @@ class TermAutomatonScorer extends Scorer {
|
|||
public float score() throws IOException {
|
||||
// TODO: we could probably do better here, e.g. look @ freqs of actual terms involved in this
|
||||
// doc and score differently
|
||||
return docScorer.score(docID, freq);
|
||||
long norm = 1L;
|
||||
if (norms != null && norms.advanceExact(docID)) {
|
||||
norm = norms.longValue();
|
||||
}
|
||||
return scorer.score(freq, norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return docScorer.getSimScorer().score(Float.MAX_VALUE, 1L);
|
||||
return scorer.score(Float.MAX_VALUE, 1L);
|
||||
}
|
||||
|
||||
static class TermRunAutomaton extends RunAutomaton {
|
||||
|
|
Loading…
Reference in New Issue