diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a3a144db849..bc39d99f739 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -180,7 +180,7 @@ New Features * GITHUB#12582: Add int8 scalar quantization to the HNSW vector format. This optionally allows for more compact lossy storage for the vectors, requiring about 75% memory for fast HNSW search. (Ben Trent) - + * GITHUB#12660: HNSW graph now can be merged with multiple thread. Configurable in Lucene99HnswVectorsFormat. (Patrick Zhai) @@ -311,13 +311,16 @@ Bug Fixes * GITHUB#12770: Stop exploring HNSW graph if scores are not getting better. (Ben Trent) +* GITHUB#12640: Ensure #finish is called on all drill-sideways collectors even if one throws a + CollectionTerminatedException (Greg Miller) + Build --------------------- * GITHUB#12752: tests.multiplier could be omitted in test failure reproduce lines (esp. in nightly mode). (Dawid Weiss) -* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed +* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed leading to odd runtime errors (Chris Hostetter, Dawid Weiss) * GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6. (Uwe Schindler) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java index cddc0ef69bc..f302093f2f2 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java @@ -18,7 +18,6 @@ package org.apache.lucene.facet; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -145,22 +144,30 @@ class DrillSidewaysScorer extends BulkScorer { } */ - if (scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) { - // System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length - // + " bits.length=" + bits.length); - // position base scorer to the first matching doc - baseApproximation.nextDoc(); - doQueryFirstScoring(acceptDocs, collector, dims); - } else if (numDims > 1 && drillDownAdvancedCost < baseQueryCost / 10) { - // System.out.println("drillDownAdvance"); - // position base scorer to the first matching doc - baseIterator.nextDoc(); - doDrillDownAdvanceScoring(acceptDocs, collector, dims); - } else { - // System.out.println("union"); - // position base scorer to the first matching doc - baseIterator.nextDoc(); - doUnionScoring(acceptDocs, collector, dims); + try { + if (scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) { + // System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + + // disis.length + // + " bits.length=" + bits.length); + // position base scorer to the first matching doc + baseApproximation.nextDoc(); + doQueryFirstScoring(acceptDocs, collector, dims); + } else if (numDims > 1 && drillDownAdvancedCost < baseQueryCost / 10) { + // System.out.println("drillDownAdvance"); + // position base scorer to the first matching doc + baseIterator.nextDoc(); + doDrillDownAdvanceScoring(acceptDocs, collector, dims); + } else { + // System.out.println("union"); + // position base scorer to the first matching doc + baseIterator.nextDoc(); + doUnionScoring(acceptDocs, collector, dims); + } + } finally { + // TODO: What's the right behavior when a collector throws CollectionTerminatedException? + // Should we stop scoring immediately (what we're doing now), or should we keep scoring until + // all collectors throw? Should users be able to specify somehow? + finish(dims); } return Integer.MAX_VALUE; @@ -199,7 +206,6 @@ class DrillSidewaysScorer extends BulkScorer { docID = baseApproximation.nextDoc(); } - finish(Collections.singleton(dim)); } /** @@ -337,8 +343,6 @@ class DrillSidewaysScorer extends BulkScorer { docID = baseApproximation.nextDoc(); } - - finish(sidewaysDims); } private static int advanceIfBehind(int docID, DocIdSetIterator iterator) throws IOException { @@ -557,7 +561,6 @@ class DrillSidewaysScorer extends BulkScorer { nextChunkStart += CHUNK; } - finish(Arrays.asList(dims)); } private void doUnionScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) @@ -712,8 +715,6 @@ class DrillSidewaysScorer extends BulkScorer { nextChunkStart += CHUNK; } - - finish(Arrays.asList(dims)); } private void collectHit(LeafCollector collector, DocsAndCost[] dims) throws IOException { @@ -765,7 +766,7 @@ class DrillSidewaysScorer extends BulkScorer { sidewaysCollector.collect(collectDocID); } - private void finish(Collection dims) throws IOException { + private void finish(DocsAndCost[] dims) throws IOException { // Note: We _only_ call #finish on the facets collectors we're managing here, but not the // "main" collector. This is because IndexSearcher handles calling #finish on the main // collector. diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index 1bc40fea5c7..03a68d4a003 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -55,6 +55,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.ConstantScoreScorer; @@ -257,6 +258,81 @@ public class TestDrillSideways extends FacetTestCase { IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir); } + public void testCollectionTerminated() throws Exception { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + Document d = new Document(); + d.add(new TextField("foo", "bar", Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new TextField("foo", "bar", Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new TextField("foo", "baz", Field.Store.NO)); + w.addDocument(d); + + try (IndexReader r = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(r); + LeafReaderContext ctx = r.leaves().get(0); + + Query baseQuery = new MatchAllDocsQuery(); + Weight baseWeight = searcher.createWeight(baseQuery, ScoreMode.COMPLETE_NO_SCORES, 1f); + Scorer baseScorer = baseWeight.scorer(ctx); + + Query dimQ = new TermQuery(new Term("foo", "bar")); + dimQ = searcher.rewrite(dimQ); + Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f); + Scorer dimScorer = dimWeight.scorer(ctx); + + FacetsCollector baseFC = new FacetsCollector(); + FacetsCollector dimFC = new FacetsCollector(); + DrillSidewaysScorer.DocsAndCost docsAndCost = + new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC); + + LeafCollector baseCollector = + new LeafCollector() { + int counter = 0; + + @Override + public void setScorer(Scorable scorer) throws IOException { + // doesn't matter + } + + @Override + public void collect(int doc) throws IOException { + if (++counter > 1) { + throw new CollectionTerminatedException(); + } + } + }; + + boolean scoreSubDocsAtOnce = random().nextBoolean(); + DrillSidewaysScorer scorer = + new DrillSidewaysScorer( + ctx, + baseScorer, + baseFC, + new DrillSidewaysScorer.DocsAndCost[] {docsAndCost}, + scoreSubDocsAtOnce); + expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null)); + + // We've set things up so that our base collector with throw CollectionTerminatedException + // after collecting the first doc. This means we'll only collect the first indexed doc for + // both our base and sideways dim facets collectors. What we really want to test here is + // that the matching docs are still correctly present and populated after an early + // termination occurs (i.e., #finish is properly called in that scenario): + assertEquals(1, baseFC.getMatchingDocs().size()); + assertEquals(1, dimFC.getMatchingDocs().size()); + FacetsCollector.MatchingDocs baseMD = baseFC.getMatchingDocs().get(0); + FacetsCollector.MatchingDocs dimMD = dimFC.getMatchingDocs().get(0); + assertEquals(1, baseMD.totalHits); + assertEquals(1, dimMD.totalHits); + } + } + } + public void testBasic() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory();