mirror of https://github.com/apache/lucene.git
LUCENE-10582: Fix merging of CollectionStatistics in CombinedFieldQuery (#910)
CombinedFieldQuery does not properly combine overridden collection statistics, resulting in an IllegalArgumentException during searches.
This commit is contained in:
parent
318177af83
commit
e319a5223c
|
@ -88,6 +88,8 @@ Bug Fixes
|
||||||
|
|
||||||
* LUCENE-10574: Prevent pathological O(N^2) merging. (Adrien Grand)
|
* LUCENE-10574: Prevent pathological O(N^2) merging. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-10582: Fix merging of overridden CollectionStatistics in CombinedFieldQuery (Yannick Welsch)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -352,13 +352,14 @@ public final class CombinedFieldQuery extends Query implements Accountable {
|
||||||
|
|
||||||
private CollectionStatistics mergeCollectionStatistics(IndexSearcher searcher)
|
private CollectionStatistics mergeCollectionStatistics(IndexSearcher searcher)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
long maxDoc = searcher.getIndexReader().maxDoc();
|
long maxDoc = 0;
|
||||||
long docCount = 0;
|
long docCount = 0;
|
||||||
long sumTotalTermFreq = 0;
|
long sumTotalTermFreq = 0;
|
||||||
long sumDocFreq = 0;
|
long sumDocFreq = 0;
|
||||||
for (FieldAndWeight fieldWeight : fieldAndWeights.values()) {
|
for (FieldAndWeight fieldWeight : fieldAndWeights.values()) {
|
||||||
CollectionStatistics collectionStats = searcher.collectionStatistics(fieldWeight.field);
|
CollectionStatistics collectionStats = searcher.collectionStatistics(fieldWeight.field);
|
||||||
if (collectionStats != null) {
|
if (collectionStats != null) {
|
||||||
|
maxDoc = Math.max(collectionStats.maxDoc(), maxDoc);
|
||||||
docCount = Math.max(collectionStats.docCount(), docCount);
|
docCount = Math.max(collectionStats.docCount(), docCount);
|
||||||
sumDocFreq = Math.max(collectionStats.sumDocFreq(), sumDocFreq);
|
sumDocFreq = Math.max(collectionStats.sumDocFreq(), sumDocFreq);
|
||||||
sumTotalTermFreq += (double) fieldWeight.weight * collectionStats.sumTotalTermFreq();
|
sumTotalTermFreq += (double) fieldWeight.weight * collectionStats.sumTotalTermFreq();
|
||||||
|
|
|
@ -589,4 +589,86 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
|
||||||
return new BM25Similarity().scorer(boost, collectionStats, termStats);
|
return new BM25Similarity().scorer(boost, collectionStats, termStats);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testOverrideCollectionStatistics() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig();
|
||||||
|
Similarity similarity = randomCompatibleSimilarity();
|
||||||
|
iwc.setSimilarity(similarity);
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
|
||||||
|
int numMatch = atLeast(10);
|
||||||
|
for (int i = 0; i < numMatch; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
doc.add(new TextField("a", "baz", Store.NO));
|
||||||
|
doc.add(new TextField("b", "baz", Store.NO));
|
||||||
|
for (int k = 0; k < 2; k++) {
|
||||||
|
doc.add(new TextField("ab", "baz", Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
doc.clear();
|
||||||
|
}
|
||||||
|
int freqA = random().nextInt(5) + 1;
|
||||||
|
for (int j = 0; j < freqA; j++) {
|
||||||
|
doc.add(new TextField("a", "foo", Store.NO));
|
||||||
|
}
|
||||||
|
int freqB = random().nextInt(5) + 1;
|
||||||
|
for (int j = 0; j < freqB; j++) {
|
||||||
|
doc.add(new TextField("b", "foo", Store.NO));
|
||||||
|
}
|
||||||
|
int freqAB = freqA + freqB;
|
||||||
|
for (int j = 0; j < freqAB; j++) {
|
||||||
|
doc.add(new TextField("ab", "foo", Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
|
||||||
|
int extraMaxDoc = randomIntBetween(0, 10);
|
||||||
|
int extraDocCount = randomIntBetween(0, extraMaxDoc);
|
||||||
|
int extraSumDocFreq = extraDocCount + randomIntBetween(0, 10);
|
||||||
|
|
||||||
|
int extraSumTotalTermFreqA = extraSumDocFreq + randomIntBetween(0, 10);
|
||||||
|
int extraSumTotalTermFreqB = extraSumDocFreq + randomIntBetween(0, 10);
|
||||||
|
int extraSumTotalTermFreqAB = extraSumTotalTermFreqA + extraSumTotalTermFreqB;
|
||||||
|
|
||||||
|
IndexSearcher searcher =
|
||||||
|
new IndexSearcher(reader) {
|
||||||
|
@Override
|
||||||
|
public CollectionStatistics collectionStatistics(String field) throws IOException {
|
||||||
|
CollectionStatistics shardStatistics = super.collectionStatistics(field);
|
||||||
|
int extraSumTotalTermFreq;
|
||||||
|
if (field.equals("a")) {
|
||||||
|
extraSumTotalTermFreq = extraSumTotalTermFreqA;
|
||||||
|
} else if (field.equals("b")) {
|
||||||
|
extraSumTotalTermFreq = extraSumTotalTermFreqB;
|
||||||
|
} else if (field.equals("ab")) {
|
||||||
|
extraSumTotalTermFreq = extraSumTotalTermFreqAB;
|
||||||
|
} else {
|
||||||
|
throw new AssertionError("should never be called");
|
||||||
|
}
|
||||||
|
return new CollectionStatistics(
|
||||||
|
field,
|
||||||
|
shardStatistics.maxDoc() + extraMaxDoc,
|
||||||
|
shardStatistics.docCount() + extraDocCount,
|
||||||
|
shardStatistics.sumTotalTermFreq() + extraSumTotalTermFreq,
|
||||||
|
shardStatistics.sumDocFreq() + extraSumDocFreq);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
searcher.setSimilarity(similarity);
|
||||||
|
CombinedFieldQuery query =
|
||||||
|
new CombinedFieldQuery.Builder()
|
||||||
|
.addField("a")
|
||||||
|
.addField("b")
|
||||||
|
.addTerm(new BytesRef("foo"))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo")));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue