1
0
mirror of https://github.com/apache/lucene.git synced 2025-02-27 21:09:19 +00:00

LUCENE-10039: Fix single-field scoring for CombinedFieldQuery ()

When there's only one field, CombinedFieldQuery will ignore its weight while
scoring. This makes the scoring inconsistent, since the field weight is supposed
to multiply its term frequency.

This PR removes the optimizations around single-field scoring to make sure the
weight is always taken into account. These optimizations are not critical since
it should be uncommon to use CombinedFieldQuery with only one field.
This commit is contained in:
Julie Tibshirani 2021-07-28 15:43:56 +03:00 committed by GitHub
parent e44636c280
commit e8663b30b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 49 additions and 19 deletions
lucene
CHANGES.txt
sandbox/src
java/org/apache/lucene/sandbox/search
test/org/apache/lucene/sandbox/search

@ -418,6 +418,9 @@ Bug Fixes
* LUCENE-10026: Fix CombinedFieldQuery equals and hashCode, which ensures
query rewrites don't drop CombinedFieldQuery clauses. (Julie Tibshirani)
* LUCENE-10039: Correct CombinedFieldQuery scoring when there is a single
field. (Julie Tibshirani)
Other
---------------------
(No changes)

@ -254,8 +254,7 @@ public final class CombinedFieldQuery extends Query implements Accountable {
@Override
public Query rewrite(IndexReader reader) throws IOException {
// optimize zero and single field cases
if (terms.length == 0) {
if (terms.length == 0 || fieldAndWeights.isEmpty()) {
return new BooleanQuery.Builder().build();
}
return this;
@ -383,14 +382,9 @@ public final class CombinedFieldQuery extends Query implements Accountable {
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
final float freq;
if (scorer instanceof CombinedFieldScorer) {
freq = ((CombinedFieldScorer) scorer).freq();
} else {
assert scorer instanceof TermScorer;
freq = ((TermScorer) scorer).freq();
}
final MultiNormsLeafSimScorer docScorer =
assert scorer instanceof CombinedFieldScorer;
float freq = ((CombinedFieldScorer) scorer).freq();
MultiNormsLeafSimScorer docScorer =
new MultiNormsLeafSimScorer(
simWeight, context.reader(), fieldAndWeights.values(), true);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
@ -423,13 +417,7 @@ public final class CombinedFieldQuery extends Query implements Accountable {
return null;
}
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
if (iterators.size() == 1) {
final LeafSimScorer scoringSimScorer =
new LeafSimScorer(simWeight, context.reader(), fields.get(0).field, true);
return new TermScorer(this, iterators.get(0), scoringSimScorer);
}
final MultiNormsLeafSimScorer scoringSimScorer =
MultiNormsLeafSimScorer scoringSimScorer =
new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
LeafSimScorer nonScoringSimScorer =
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);

@ -71,8 +71,6 @@ final class MultiNormsLeafSimScorer {
if (normsList.isEmpty()) {
norms = null;
} else if (normsList.size() == 1) {
norms = normsList.get(0);
} else {
final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
final float[] weightArr = new float[normsList.size()];

@ -319,6 +319,47 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
dir.close();
}
public void testCopyFieldWithSingleField() throws IOException {
Directory dir = new MMapDirectory(createTempDir());
Similarity similarity = randomCompatibleSimilarity();
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setSimilarity(similarity);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
int boost = Math.max(1, random().nextInt(5));
int numMatch = atLeast(10);
for (int i = 0; i < numMatch; i++) {
Document doc = new Document();
int freqA = random().nextInt(5) + 1;
for (int j = 0; j < freqA; j++) {
doc.add(new TextField("a", "foo", Store.NO));
}
int freqB = freqA * boost;
for (int j = 0; j < freqB; j++) {
doc.add(new TextField("b", "foo", Store.NO));
}
w.addDocument(doc);
}
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(similarity);
CombinedFieldQuery query =
new CombinedFieldQuery.Builder()
.addField("a", (float) boost)
.addTerm(new BytesRef("foo"))
.build();
checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("b", "foo")));
reader.close();
w.close();
dir.close();
}
public void testCopyFieldWithMissingFields() throws IOException {
Directory dir = new MMapDirectory(createTempDir());
Similarity similarity = randomCompatibleSimilarity();