From 81ef72d3efbe8a2be1c045179357c7a625b3c1b4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 23 Oct 2019 17:48:17 +0200 Subject: [PATCH] Lucene#asSequentialBits gets the leadCost backwards. (#48335) (#48403) The comment says it needs random-access, but it passes `Long#MAX_VALUE` as the lead cost, which forces sequential access, it should pass `0` instead. I took advantage of this fix to improve the logic to leverage an estimation of the number of times that `Bits#get` gets called to make better decisions. --- .../elasticsearch/common/lucene/Lucene.java | 19 +++- .../search/function/FunctionScoreQuery.java | 3 +- .../common/lucene/LuceneTests.java | 102 ++++++++++++++++++ 3 files changed, 119 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 2fdfaf37f33..f1d92a776c8 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -816,16 +816,27 @@ public class Lucene { } /** - * Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match - * all documents contained in the set. Note that the returned {@link Bits} - * instance MUST be consumed in order. + * Return a {@link Bits} view of the provided scorer. + * NOTE: that the returned {@link Bits} instance MUST be consumed in order. + * @see #asSequentialAccessBits(int, ScorerSupplier, long) */ public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier) throws IOException { + return asSequentialAccessBits(maxDoc, scorerSupplier, 0L); + } + + /** + * Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match + * all documents contained in the set. + * NOTE: that the returned {@link Bits} instance MUST be consumed in order. + * @param estimatedGetCount an estimation of the number of times that {@link Bits#get} will get called + */ + public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier, + long estimatedGetCount) throws IOException { if (scorerSupplier == null) { return new Bits.MatchNoBits(maxDoc); } // Since we want bits, we need random-access - final Scorer scorer = scorerSupplier.get(Long.MAX_VALUE); // this never returns null + final Scorer scorer = scorerSupplier.get(estimatedGetCount); // this never returns null final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator(); final DocIdSetIterator iterator; if (twoPhase == null) { diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java index 2592c645111..d896a752f6f 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java @@ -269,6 +269,7 @@ public class FunctionScoreQuery extends Query { if (subQueryScorer == null) { return null; } + final long leadCost = subQueryScorer.iterator().cost(); final LeafScoreFunction[] leafFunctions = new LeafScoreFunction[functions.length]; final Bits[] docSets = new Bits[functions.length]; for (int i = 0; i < functions.length; i++) { @@ -276,7 +277,7 @@ public class FunctionScoreQuery extends Query { leafFunctions[i] = function.getLeafScoreFunction(context); if (filterWeights[i] != null) { ScorerSupplier filterScorerSupplier = filterWeights[i].scorerSupplier(context); - docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorerSupplier); + docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorerSupplier, leadCost); } else { docSets[i] = new Bits.MatchAllBits(context.reader().maxDoc()); } diff --git a/server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java b/server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java index 1891be362b8..a716350638d 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java @@ -24,9 +24,11 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.LatLonDocValuesField; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; @@ -36,10 +38,15 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.SoftDeletesRetentionMergePolicy; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.search.SortedSetSelector; @@ -420,6 +427,101 @@ public class LuceneTests extends ESTestCase { dir.close(); } + private static class UnsupportedQuery extends Query { + + @Override + public String toString(String field) { + return "Unsupported"; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof UnsupportedQuery; + } + + @Override + public int hashCode() { + return 42; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new Weight(this) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return true; + } + + @Override + public void extractTerms(Set terms) { + throw new UnsupportedOperationException(); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + return new ScorerSupplier() { + + @Override + public Scorer get(long leadCost) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return context.reader().maxDoc(); + } + + }; + } + + }; + } + + } + + public void testAsSequentialBitsUsesRandomAccess() throws IOException { + try (Directory dir = newDirectory()) { + try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()))) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("foo", 5L)); + // we need more than 8 documents because doc values are artificially penalized by IndexOrDocValuesQuery + for (int i = 0; i < 10; ++i) { + w.addDocument(doc); + } + w.forceMerge(1); + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = newSearcher(reader); + searcher.setQueryCache(null); + Query query = new IndexOrDocValuesQuery( + new UnsupportedQuery(), NumericDocValuesField.newSlowRangeQuery("foo", 3L, 5L)); + Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f); + + // Random access by default + ScorerSupplier scorerSupplier = weight.scorerSupplier(reader.leaves().get(0)); + Bits bits = Lucene.asSequentialAccessBits(reader.maxDoc(), scorerSupplier); + assertNotNull(bits); + assertTrue(bits.get(0)); + + // Moves to sequential access if Bits#get is called more than the number of matches + ScorerSupplier scorerSupplier2 = weight.scorerSupplier(reader.leaves().get(0)); + expectThrows(UnsupportedOperationException.class, + () -> Lucene.asSequentialAccessBits(reader.maxDoc(), scorerSupplier2, reader.maxDoc())); + } + } + } + } + /** * Test that the "unmap hack" is detected as supported by lucene. * This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038