mirror of https://github.com/apache/lucene.git
Ensure DrillSidewaysScorer calls LeafCollector#finish on all sideways-dim FacetsCollectors (#12640)
This commit is contained in:
parent
1999353d9e
commit
117e8d3435
|
@ -311,6 +311,9 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12770: Stop exploring HNSW graph if scores are not getting better. (Ben Trent)
|
||||
|
||||
* GITHUB#12640: Ensure #finish is called on all drill-sideways collectors even if one throws a
|
||||
CollectionTerminatedException (Greg Miller)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
|
@ -145,8 +144,10 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
}
|
||||
*/
|
||||
|
||||
try {
|
||||
if (scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) {
|
||||
// System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length
|
||||
// System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" +
|
||||
// disis.length
|
||||
// + " bits.length=" + bits.length);
|
||||
// position base scorer to the first matching doc
|
||||
baseApproximation.nextDoc();
|
||||
|
@ -162,6 +163,12 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
baseIterator.nextDoc();
|
||||
doUnionScoring(acceptDocs, collector, dims);
|
||||
}
|
||||
} finally {
|
||||
// TODO: What's the right behavior when a collector throws CollectionTerminatedException?
|
||||
// Should we stop scoring immediately (what we're doing now), or should we keep scoring until
|
||||
// all collectors throw? Should users be able to specify somehow?
|
||||
finish(dims);
|
||||
}
|
||||
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
|
@ -199,7 +206,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
|
||||
docID = baseApproximation.nextDoc();
|
||||
}
|
||||
finish(Collections.singleton(dim));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -337,8 +343,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
|
||||
docID = baseApproximation.nextDoc();
|
||||
}
|
||||
|
||||
finish(sidewaysDims);
|
||||
}
|
||||
|
||||
private static int advanceIfBehind(int docID, DocIdSetIterator iterator) throws IOException {
|
||||
|
@ -557,7 +561,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
|
||||
nextChunkStart += CHUNK;
|
||||
}
|
||||
finish(Arrays.asList(dims));
|
||||
}
|
||||
|
||||
private void doUnionScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims)
|
||||
|
@ -712,8 +715,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
|
||||
nextChunkStart += CHUNK;
|
||||
}
|
||||
|
||||
finish(Arrays.asList(dims));
|
||||
}
|
||||
|
||||
private void collectHit(LeafCollector collector, DocsAndCost[] dims) throws IOException {
|
||||
|
@ -765,7 +766,7 @@ class DrillSidewaysScorer extends BulkScorer {
|
|||
sidewaysCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
private void finish(Collection<DocsAndCost> dims) throws IOException {
|
||||
private void finish(DocsAndCost[] dims) throws IOException {
|
||||
// Note: We _only_ call #finish on the facets collectors we're managing here, but not the
|
||||
// "main" collector. This is because IndexSearcher handles calling #finish on the main
|
||||
// collector.
|
||||
|
|
|
@ -55,6 +55,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
|
@ -257,6 +258,81 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
|
||||
}
|
||||
|
||||
public void testCollectionTerminated() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
|
||||
Document d = new Document();
|
||||
d.add(new TextField("foo", "bar", Field.Store.NO));
|
||||
w.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new TextField("foo", "bar", Field.Store.NO));
|
||||
w.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new TextField("foo", "baz", Field.Store.NO));
|
||||
w.addDocument(d);
|
||||
|
||||
try (IndexReader r = w.getReader()) {
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
LeafReaderContext ctx = r.leaves().get(0);
|
||||
|
||||
Query baseQuery = new MatchAllDocsQuery();
|
||||
Weight baseWeight = searcher.createWeight(baseQuery, ScoreMode.COMPLETE_NO_SCORES, 1f);
|
||||
Scorer baseScorer = baseWeight.scorer(ctx);
|
||||
|
||||
Query dimQ = new TermQuery(new Term("foo", "bar"));
|
||||
dimQ = searcher.rewrite(dimQ);
|
||||
Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f);
|
||||
Scorer dimScorer = dimWeight.scorer(ctx);
|
||||
|
||||
FacetsCollector baseFC = new FacetsCollector();
|
||||
FacetsCollector dimFC = new FacetsCollector();
|
||||
DrillSidewaysScorer.DocsAndCost docsAndCost =
|
||||
new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC);
|
||||
|
||||
LeafCollector baseCollector =
|
||||
new LeafCollector() {
|
||||
int counter = 0;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
// doesn't matter
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
if (++counter > 1) {
|
||||
throw new CollectionTerminatedException();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
boolean scoreSubDocsAtOnce = random().nextBoolean();
|
||||
DrillSidewaysScorer scorer =
|
||||
new DrillSidewaysScorer(
|
||||
ctx,
|
||||
baseScorer,
|
||||
baseFC,
|
||||
new DrillSidewaysScorer.DocsAndCost[] {docsAndCost},
|
||||
scoreSubDocsAtOnce);
|
||||
expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null));
|
||||
|
||||
// We've set things up so that our base collector with throw CollectionTerminatedException
|
||||
// after collecting the first doc. This means we'll only collect the first indexed doc for
|
||||
// both our base and sideways dim facets collectors. What we really want to test here is
|
||||
// that the matching docs are still correctly present and populated after an early
|
||||
// termination occurs (i.e., #finish is properly called in that scenario):
|
||||
assertEquals(1, baseFC.getMatchingDocs().size());
|
||||
assertEquals(1, dimFC.getMatchingDocs().size());
|
||||
FacetsCollector.MatchingDocs baseMD = baseFC.getMatchingDocs().get(0);
|
||||
FacetsCollector.MatchingDocs dimMD = dimFC.getMatchingDocs().get(0);
|
||||
assertEquals(1, baseMD.totalHits);
|
||||
assertEquals(1, dimMD.totalHits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
|
|
Loading…
Reference in New Issue