mirror of
https://github.com/apache/lucene.git
synced 2025-02-27 21:09:19 +00:00
LUCENE-10060: Ensure DrillSidewaysQuery instances never get cached (#261)
This commit is contained in:
parent
f1fdd2465c
commit
dbf7e1865f
@ -470,6 +470,8 @@ Bug Fixes
|
||||
|
||||
* LUCENE-10008: Respect ignoreCase in CommonGramsFilterFactory (Vigya Sharma)
|
||||
|
||||
* LUCENE-10060: Ensure DrillSidewaysQuery instances never get cached. (Greg Miller, Zachary Chen)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
(No changes)
|
||||
|
@ -34,14 +34,12 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.FilterCollector;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.search.MultiCollectorManager;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
@ -239,17 +237,7 @@ public class DrillSideways {
|
||||
drillSidewaysFacetsCollectorManagers,
|
||||
drillDownQueries,
|
||||
scoreSubDocsAtOnce());
|
||||
if (hitCollector.scoreMode().needsScores() == false) {
|
||||
// this is a horrible hack in order to make sure IndexSearcher will not
|
||||
// attempt to cache the DrillSidewaysQuery
|
||||
hitCollector =
|
||||
new FilterCollector(hitCollector) {
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return ScoreMode.COMPLETE;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
searcher.search(dsq, hitCollector);
|
||||
|
||||
FacetsCollector drillDownCollector;
|
||||
|
@ -160,15 +160,14 @@ class DrillSidewaysQuery extends Query {
|
||||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
if (baseWeight.isCacheable(ctx) == false) {
|
||||
return false;
|
||||
}
|
||||
for (Weight w : drillDowns) {
|
||||
if (w.isCacheable(ctx) == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
// We can never cache DSQ instances. It's critical that the BulkScorer produced by this
|
||||
// Weight runs through the "normal" execution path so that it has access to an
|
||||
// "acceptDocs" instance that accurately reflects deleted docs. During caching,
|
||||
// "acceptDocs" is null so that caching over-matches (since the final BulkScorer would
|
||||
// account for deleted docs). The problem is that this BulkScorer has a side-effect of
|
||||
// populating the "sideways" FacetsCollectors, so it will use deleted docs in its
|
||||
// sideways counting if caching kicks in. See LUCENE-10060:
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -61,6 +61,7 @@ import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryCachingPolicy;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
@ -134,6 +135,124 @@ public class TestDrillSideways extends FacetTestCase {
|
||||
return newSearcher(reader, true, false, random().nextBoolean());
|
||||
}
|
||||
|
||||
// See LUCENE-10060:
|
||||
public void testNoCaching() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
DirectoryTaxonomyWriter taxoWriter =
|
||||
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
|
||||
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
|
||||
// Setup some basic test documents:
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef("1")));
|
||||
doc.add(new FacetField("Color", "Red"));
|
||||
doc.add(new FacetField("Size", "Small"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "2", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef("2")));
|
||||
doc.add(new FacetField("Color", "Green"));
|
||||
doc.add(new FacetField("Size", "Small"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "3", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef("3")));
|
||||
doc.add(new FacetField("Color", "Blue"));
|
||||
doc.add(new FacetField("Size", "Small"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "4", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef("4")));
|
||||
doc.add(new FacetField("Color", "Red"));
|
||||
doc.add(new FacetField("Size", "Medium"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "5", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef("5")));
|
||||
doc.add(new FacetField("Color", "Blue"));
|
||||
doc.add(new FacetField("Size", "Medium"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "6", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef("6")));
|
||||
doc.add(new FacetField("Color", "Blue"));
|
||||
doc.add(new FacetField("Size", "Large"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
// Delete a couple documents. We'll want to make sure they don't get counted in binning:
|
||||
writer.deleteDocuments(new Term("id", "4"));
|
||||
writer.deleteDocuments(new Term("id", "6"));
|
||||
|
||||
// Simple DDQ that just filters all results by Color == Blue:
|
||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||
ddq.add("Color", "Blue");
|
||||
|
||||
// Setup an IndexSearcher that will try to cache queries aggressively:
|
||||
IndexSearcher searcher = getNewSearcher(writer.getReader());
|
||||
searcher.setQueryCachingPolicy(
|
||||
new QueryCachingPolicy() {
|
||||
@Override
|
||||
public void onUse(Query query) {}
|
||||
|
||||
@Override
|
||||
public boolean shouldCache(Query query) {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
|
||||
// Setup a DS instance for searching:
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader);
|
||||
|
||||
// We'll use a CollectorManager to trigger the trickiest caching behavior:
|
||||
SimpleCollectorManager collectorManager =
|
||||
new SimpleCollectorManager(10, Comparator.comparing(cr -> cr.id));
|
||||
// Make sure our CM produces Collectors that _do not_ need scores to ensure IndexSearcher tries
|
||||
// to cache:
|
||||
assertFalse(collectorManager.newCollector().scoreMode().needsScores());
|
||||
// If we incorrectly cache here, the "sideways" FacetsCollectors will get populated with counts
|
||||
// for the deleted
|
||||
// docs. Make sure they don't:
|
||||
DrillSideways.ConcurrentDrillSidewaysResult<List<DocAndScore>> concurrentResult =
|
||||
ds.search(ddq, collectorManager);
|
||||
assertEquals(2, concurrentResult.collectorResult.size());
|
||||
assertEquals(
|
||||
"dim=Color path=[] value=4 childCount=3\n Blue (2)\n Red (1)\n Green (1)\n",
|
||||
concurrentResult.facets.getTopChildren(10, "Color").toString());
|
||||
assertEquals(
|
||||
"dim=Size path=[] value=2 childCount=2\n Small (1)\n Medium (1)\n",
|
||||
concurrentResult.facets.getTopChildren(10, "Size").toString());
|
||||
|
||||
// Now do the same thing but use a Collector directly:
|
||||
SimpleCollector collector = new SimpleCollector();
|
||||
// Make sure our Collector _does not_ need scores to ensure IndexSearcher tries to cache:
|
||||
assertFalse(collector.scoreMode().needsScores());
|
||||
// If we incorrectly cache here, the "sideways" FacetsCollectors will get populated with counts
|
||||
// for the deleted
|
||||
// docs. Make sure they don't:
|
||||
DrillSidewaysResult result = ds.search(ddq, collector);
|
||||
assertEquals(2, collector.hits.size());
|
||||
assertEquals(
|
||||
"dim=Color path=[] value=4 childCount=3\n Blue (2)\n Red (1)\n Green (1)\n",
|
||||
result.facets.getTopChildren(10, "Color").toString());
|
||||
assertEquals(
|
||||
"dim=Size path=[] value=2 childCount=2\n Small (1)\n Medium (1)\n",
|
||||
result.facets.getTopChildren(10, "Size").toString());
|
||||
|
||||
writer.close();
|
||||
IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
@ -1304,7 +1423,7 @@ public class TestDrillSideways extends FacetTestCase {
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return ScoreMode.COMPLETE;
|
||||
return ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user