LUCENE-9988: Fix DrillSideways bug discovered in randomized testing (#167)

This commit is contained in:
Greg Miller 2021-06-03 15:03:09 -07:00 committed by GitHub
parent efb7b2a5e8
commit 7a7003c51c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 2 deletions

View File

@ -293,6 +293,8 @@ Bug fixes
* LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. Before, rewriting * LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. Before, rewriting
could slightly change the scoring when weights were specified. (Naoto Minami via Julie Tibshirani) could slightly change the scoring when weights were specified. (Naoto Minami via Julie Tibshirani)
* LUCENE-9988: Fix DrillSideways correctness bug introduced in LUCENE-9944 (Greg Miller)
Changes in Backwards Compatibility Policy Changes in Backwards Compatibility Policy
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top * LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top

View File

@ -65,11 +65,34 @@ class DrillSidewaysQuery extends Query {
FacetsCollectorManager[] drillSidewaysCollectorManagers, FacetsCollectorManager[] drillSidewaysCollectorManagers,
Query[] drillDownQueries, Query[] drillDownQueries,
boolean scoreSubDocsAtOnce) { boolean scoreSubDocsAtOnce) {
this(
baseQuery,
drillDownCollectorManager,
drillSidewaysCollectorManagers,
new ArrayList<>(),
new ArrayList<>(),
drillDownQueries,
scoreSubDocsAtOnce);
}
/**
* Needed for {@link #rewrite(IndexReader)}. Ensures the same "managed" lists get used since
* {@link DrillSideways} accesses references to these through the original {@code
* DrillSidewaysQuery}.
*/
private DrillSidewaysQuery(
Query baseQuery,
FacetsCollectorManager drillDownCollectorManager,
FacetsCollectorManager[] drillSidewaysCollectorManagers,
List<FacetsCollector> managedDrillDownCollectors,
List<FacetsCollector[]> managedDrillSidewaysCollectors,
Query[] drillDownQueries,
boolean scoreSubDocsAtOnce) {
this.baseQuery = Objects.requireNonNull(baseQuery); this.baseQuery = Objects.requireNonNull(baseQuery);
this.drillDownCollectorManager = drillDownCollectorManager; this.drillDownCollectorManager = drillDownCollectorManager;
this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers; this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers;
this.managedDrillDownCollectors = new ArrayList<>(); this.managedDrillDownCollectors = managedDrillDownCollectors;
this.managedDrillSidewaysCollectors = new ArrayList<>(); this.managedDrillSidewaysCollectors = managedDrillSidewaysCollectors;
this.drillDownQueries = drillDownQueries; this.drillDownQueries = drillDownQueries;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
} }
@ -96,6 +119,8 @@ class DrillSidewaysQuery extends Query {
newQuery, newQuery,
drillDownCollectorManager, drillDownCollectorManager,
drillSidewaysCollectorManagers, drillSidewaysCollectorManagers,
managedDrillDownCollectors,
managedDrillSidewaysCollectors,
drillDownQueries, drillDownQueries,
scoreSubDocsAtOnce); scoreSubDocsAtOnce);
} }

View File

@ -358,26 +358,31 @@ public class TestDrillSideways extends FacetTestCase {
RandomIndexWriter writer = new RandomIndexWriter(random(), dir); RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document(); Document doc = new Document();
doc.add(newStringField("content", "foo", Field.Store.NO));
doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Publish Date", "2010", "10", "15"));
writer.addDocument(config.build(taxoWriter, doc)); writer.addDocument(config.build(taxoWriter, doc));
doc = new Document(); doc = new Document();
doc.add(newStringField("content", "foo", Field.Store.NO));
doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20")); doc.add(new FacetField("Publish Date", "2010", "10", "20"));
writer.addDocument(config.build(taxoWriter, doc)); writer.addDocument(config.build(taxoWriter, doc));
doc = new Document(); doc = new Document();
doc.add(newStringField("content", "foo", Field.Store.NO));
doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1")); doc.add(new FacetField("Publish Date", "2012", "1", "1"));
writer.addDocument(config.build(taxoWriter, doc)); writer.addDocument(config.build(taxoWriter, doc));
doc = new Document(); doc = new Document();
doc.add(newStringField("content", "bar", Field.Store.NO));
doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7")); doc.add(new FacetField("Publish Date", "2012", "1", "7"));
writer.addDocument(config.build(taxoWriter, doc)); writer.addDocument(config.build(taxoWriter, doc));
doc = new Document(); doc = new Document();
doc.add(newStringField("content", "bar", Field.Store.NO));
doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5")); doc.add(new FacetField("Publish Date", "1999", "5", "5"));
writer.addDocument(config.build(taxoWriter, doc)); writer.addDocument(config.build(taxoWriter, doc));
@ -581,6 +586,27 @@ public class TestDrillSideways extends FacetTestCase {
// Expect null facets since we provided a null FacetsCollectorManager // Expect null facets since we provided a null FacetsCollectorManager
assertNull(r.facets); assertNull(r.facets);
// Test the case where the base query rewrites itself. See LUCENE-9988:
Query baseQuery =
new TermQuery(new Term("content", "foo")) {
@Override
public Query rewrite(IndexReader reader) {
// return a new instance, forcing the DrillDownQuery to also rewrite itself, exposing
// the bug in LUCENE-9988:
return new TermQuery(getTerm());
}
};
ddq = new DrillDownQuery(config, baseQuery);
ddq.add("Author", "Lisa");
r = ds.search(ddq, manager);
assertEquals(2, r.collectorResult.size());
assertEquals(
"dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n",
r.facets.getTopChildren(10, "Publish Date").toString());
assertEquals(
"dim=Author path=[] value=3 childCount=2\n Lisa (2)\n Bob (1)\n",
r.facets.getTopChildren(10, "Author").toString());
writer.close(); writer.close();
IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir); IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
} }