Lucene#asSequentialBits gets the leadCost backwards. (#48335) (#48403)

The comment says it needs random-access, but it passes `Long#MAX_VALUE` as the lead cost, which forces sequential access, it should pass `0` instead. I took advantage of this fix to improve the logic to leverage an estimation of the number of times that `Bits#get` gets called to make better decisions.
2019-10-23 17:48:17 +02:00 · 2019-10-23 17:48:17 +02:00 · 81ef72d3ef
parent aaa6209be6
commit 81ef72d3ef
3 changed files with 119 additions and 5 deletions
--- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
+++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@ -816,16 +816,27 @@ public class Lucene {
    }
    /**
-     * Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match
+     * Return a {@link Bits} view of the provided scorer.
-     * all documents contained in the set. Note that the returned {@link Bits}
+     * <b>NOTE</b>: that the returned {@link Bits} instance MUST be consumed in order.
-     * instance MUST be consumed in order.
+     * @see #asSequentialAccessBits(int, ScorerSupplier, long)
     */
    public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier) throws IOException {
        return asSequentialAccessBits(maxDoc, scorerSupplier, 0L);
    }
    /**
     * Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match
     * all documents contained in the set.
     * <b>NOTE</b>: that the returned {@link Bits} instance MUST be consumed in order.
     * @param estimatedGetCount an estimation of the number of times that {@link Bits#get} will get called
     */
    public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier,
            long estimatedGetCount) throws IOException {
        if (scorerSupplier == null) {
            return new Bits.MatchNoBits(maxDoc);
        }
        // Since we want bits, we need random-access
-        final Scorer scorer = scorerSupplier.get(Long.MAX_VALUE); // this never returns null
+        final Scorer scorer = scorerSupplier.get(estimatedGetCount); // this never returns null
        final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
        final DocIdSetIterator iterator;
        if (twoPhase == null) {
--- a/server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java
+++ b/server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java
@ -269,6 +269,7 @@ public class FunctionScoreQuery extends Query {
            if (subQueryScorer == null) {
                return null;
            }
            final long leadCost = subQueryScorer.iterator().cost();
            final LeafScoreFunction[] leafFunctions = new LeafScoreFunction[functions.length];
            final Bits[] docSets = new Bits[functions.length];
            for (int i = 0; i < functions.length; i++) {
@ -276,7 +277,7 @@ public class FunctionScoreQuery extends Query {
                leafFunctions[i] = function.getLeafScoreFunction(context);
                if (filterWeights[i] != null) {
                    ScorerSupplier filterScorerSupplier = filterWeights[i].scorerSupplier(context);
-                    docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorerSupplier);
+                    docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorerSupplier, leadCost);
                } else {
                    docSets[i] = new Bits.MatchAllBits(context.reader().maxDoc());
                }
--- a/server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java
+++ b/server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java
@ -24,9 +24,11 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.LatLonDocValuesField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
@ -36,10 +38,15 @@ import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.ScorerSupplier;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedNumericSortField;
 import org.apache.lucene.search.SortedSetSelector;
@ -420,6 +427,101 @@ public class LuceneTests extends ESTestCase {
        dir.close();
    }
    private static class UnsupportedQuery extends Query {
        @Override
        public String toString(String field) {
            return "Unsupported";
        }
        @Override
        public boolean equals(Object obj) {
            return obj instanceof UnsupportedQuery;
        }
        @Override
        public int hashCode() {
            return 42;
        }
        @Override
        public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
            return new Weight(this) {
                @Override
                public boolean isCacheable(LeafReaderContext ctx) {
                    return true;
                }
                @Override
                public void extractTerms(Set<Term> terms) {
                    throw new UnsupportedOperationException();
                }
                @Override
                public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                    throw new UnsupportedOperationException();
                }
                @Override
                public Scorer scorer(LeafReaderContext context) throws IOException {
                    throw new UnsupportedOperationException();
                }
                @Override
                public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
                    return new ScorerSupplier() {
                        @Override
                        public Scorer get(long leadCost) throws IOException {
                            throw new UnsupportedOperationException();
                        }
                        @Override
                        public long cost() {
                            return context.reader().maxDoc();
                        }
                    };
                }
            };
        }
    }
    public void testAsSequentialBitsUsesRandomAccess() throws IOException {
        try (Directory dir = newDirectory()) {
            try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()))) {
                Document doc = new Document();
                doc.add(new NumericDocValuesField("foo", 5L));
                // we need more than 8 documents because doc values are artificially penalized by IndexOrDocValuesQuery
                for (int i = 0; i < 10; ++i) {
                    w.addDocument(doc);
                }
                w.forceMerge(1);
                try (IndexReader reader = DirectoryReader.open(w)) {
                    IndexSearcher searcher = newSearcher(reader);
                    searcher.setQueryCache(null);
                    Query query = new IndexOrDocValuesQuery(
                            new UnsupportedQuery(), NumericDocValuesField.newSlowRangeQuery("foo", 3L, 5L));
                    Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f);
                    // Random access by default
                    ScorerSupplier scorerSupplier = weight.scorerSupplier(reader.leaves().get(0));
                    Bits bits = Lucene.asSequentialAccessBits(reader.maxDoc(), scorerSupplier);
                    assertNotNull(bits);
                    assertTrue(bits.get(0));
                    // Moves to sequential access if Bits#get is called more than the number of matches
                    ScorerSupplier scorerSupplier2 = weight.scorerSupplier(reader.leaves().get(0));
                    expectThrows(UnsupportedOperationException.class,
                            () -> Lucene.asSequentialAccessBits(reader.maxDoc(), scorerSupplier2, reader.maxDoc()));
                }
            }
        }
    }
    /**
     * Test that the "unmap hack" is detected as supported by lucene.
     * This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038