Assign a dummy simScorer in TermsWeight if score is not needed (#12383)

This commit is contained in:
Sagar 2023-06-30 06:14:33 -07:00 committed by GitHub
parent 223eecca33
commit 40ee6e583e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 1 deletions

View File

@ -136,6 +136,8 @@ Optimizations
* GITHUB#12361: Faster top-level disjunctions sorted by descending score. * GITHUB#12361: Faster top-level disjunctions sorted by descending score.
(Adrien Grand) (Adrien Grand)
* GITHUB#12383: Assign a dummy simScorer in TermsWeight if score is not needed. (Sagar Upadhyaya)
* GITHUB#12372: Reduce allocation during HNSW construction (Jonathan Ellis) * GITHUB#12372: Reduce allocation during HNSW construction (Jonathan Ellis)
* GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna) * GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna)

View File

@ -72,7 +72,22 @@ public class TermQuery extends Query {
if (termStats == null) { if (termStats == null) {
this.simScorer = null; // term doesn't exist in any segment, we won't use similarity at all this.simScorer = null; // term doesn't exist in any segment, we won't use similarity at all
} else { } else {
this.simScorer = similarity.scorer(boost, collectionStats, termStats); // Assigning a dummy simScorer in case score is not needed to avoid unnecessary float[]
// allocations in case default BM25Scorer is used.
// See: https://github.com/apache/lucene/issues/12297
if (scoreMode.needsScores()) {
this.simScorer = similarity.scorer(boost, collectionStats, termStats);
} else {
// Assigning a dummy scorer as this is not expected to be called since scores are not
// needed.
this.simScorer =
new Similarity.SimScorer() {
@Override
public float score(float freq, long norm) {
return 0f;
}
};
}
} }
} }

View File

@ -17,11 +17,14 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CompositeReaderContext; import org.apache.lucene.index.CompositeReaderContext;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.FilterDirectoryReader; import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
@ -32,6 +35,7 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates; import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.search.DummyTotalHitCountCollector; import org.apache.lucene.tests.search.DummyTotalHitCountCollector;
@ -164,6 +168,53 @@ public class TestTermQuery extends LuceneTestCase {
IOUtils.close(reader, w, dir); IOUtils.close(reader, w, dir);
} }
public void testWithWithDifferentScoreModes() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w =
new RandomIndexWriter(
random(), dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
// segment that contains the term
Document doc = new Document();
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
w.getReader().close();
DirectoryReader reader = w.getReader();
IndexSearcher searcher = new IndexSearcher(reader);
Similarity existingSimilarity = searcher.getSimilarity();
for (ScoreMode scoreMode : ScoreMode.values()) {
final AtomicReference<ScoreMode> scoreModeInWeight = new AtomicReference<ScoreMode>();
final AtomicBoolean scorerCalled = new AtomicBoolean();
searcher.setSimilarity(
new Similarity() { // Wrapping existing similarity for testing
@Override
public long computeNorm(FieldInvertState state) {
return existingSimilarity.computeNorm(state);
}
@Override
public SimScorer scorer(
float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
scorerCalled.set(true);
return existingSimilarity.scorer(boost, collectionStats, termStats);
}
});
TermQuery termQuery =
new TermQuery(new Term("foo", "bar")) {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
scoreModeInWeight.set(scoreMode);
return super.createWeight(searcher, scoreMode, boost);
}
};
termQuery.createWeight(searcher, scoreMode, 1f);
assertEquals(scoreMode, scoreModeInWeight.get());
assertEquals(scoreMode.needsScores(), scorerCalled.get());
}
IOUtils.close(reader, w, dir);
}
private static class NoSeekDirectoryReader extends FilterDirectoryReader { private static class NoSeekDirectoryReader extends FilterDirectoryReader {
public NoSeekDirectoryReader(DirectoryReader in) throws IOException { public NoSeekDirectoryReader(DirectoryReader in) throws IOException {