Speed up named queries. #18470

Named queries have a performance bug when they are used with expensive queries that need to perform a lot of work up-front like fuzzy or range queries (including with points). The reason is that they currently re-create the weight and scorer for every hit. Instead we should create weights exactly once and use a single Scorer for all documents that are on the same segment.
2016-05-19 18:38:35 +02:00 · 2016-05-19 18:38:35 +02:00 · cb2cfdd9c0
parent 89a57bce5a
commit cb2cfdd9c0
1 changed files with 53 additions and 38 deletions
--- a/core/src/main/java/org/elasticsearch/search/fetch/matchedqueries/MatchedQueriesFetchSubPhase.java
+++ b/core/src/main/java/org/elasticsearch/search/fetch/matchedqueries/MatchedQueriesFetchSubPhase.java
@ -18,11 +18,14 @@
 */
 package org.elasticsearch.search.fetch.matchedqueries;

+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
 import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.search.SearchParseElement;
 import org.elasticsearch.search.fetch.FetchSubPhase;
 import org.elasticsearch.search.internal.InternalSearchHit;
@ -31,6 +34,8 @@ import org.elasticsearch.search.internal.SearchContext.Lifetime;

 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

@ -48,58 +53,68 @@ public class MatchedQueriesFetchSubPhase implements FetchSubPhase {

    @Override
    public boolean hitsExecutionNeeded(SearchContext context) {
-        return false;
+        return true; // we short-circuit in hitsExecute
    }

    @Override
    public void hitsExecute(SearchContext context, InternalSearchHit[] hits) {
-    }
+        if (hits.length == 0) {
+            return;
+        }
+        hits = hits.clone(); // don't modify the incoming hits
+        Arrays.sort(hits, (a, b) -> Integer.compare(a.docId(), b.docId()));
+        @SuppressWarnings("unchecked")
+        List<String>[] matchedQueries = new List[hits.length];
+        for (int i = 0; i < matchedQueries.length; ++i) {
+            matchedQueries[i] = new ArrayList<>();
+        }

-    @Override
-    public boolean hitExecutionNeeded(SearchContext context) {
-        return !context.parsedQuery().namedFilters().isEmpty()
-                || (context.parsedPostFilter() !=null && !context.parsedPostFilter().namedFilters().isEmpty());
-    }
-
-    @Override
-    public void hitExecute(SearchContext context, HitContext hitContext) {
-        List<String> matchedQueries = new ArrayList<>(2);
+        Map<String, Query> namedQueries = new HashMap<>(context.parsedQuery().namedFilters());
+        if (context.parsedPostFilter() != null) {
+            namedQueries.putAll(context.parsedPostFilter().namedFilters());
+        }

        try {
-            addMatchedQueries(hitContext, context.parsedQuery().namedFilters(), matchedQueries);
-
-            if (context.parsedPostFilter() != null) {
-                addMatchedQueries(hitContext, context.parsedPostFilter().namedFilters(), matchedQueries);
+            for (Map.Entry<String, Query> entry : namedQueries.entrySet()) {
+                String name = entry.getKey();
+                Query query = entry.getValue();
+                int readerIndex = -1;
+                int docBase = -1;
+                Weight weight = context.searcher().createNormalizedWeight(query, false);
+                Bits matchingDocs = null;
+                for (int i = 0; i < hits.length; ++i) {
+                    InternalSearchHit hit = hits[i];
+                    int hitReaderIndex = ReaderUtil.subIndex(hit.docId(), context.searcher().getIndexReader().leaves());
+                    if (readerIndex != hitReaderIndex) {
+                        readerIndex = hitReaderIndex;
+                        LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(readerIndex);
+                        docBase = ctx.docBase;
+                        // scorers can be costly to create, so reuse them across docs of the same segment
+                        Scorer scorer = weight.scorer(ctx);
+                        matchingDocs = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), scorer);
+                    }
+                    if (matchingDocs.get(hit.docId() - docBase)) {
+                        matchedQueries[i].add(name);
+                    }
+                }
+            }
+            for (int i = 0; i < hits.length; ++i) {
+                hits[i].matchedQueries(matchedQueries[i].toArray(new String[0]));
            }
        } catch (IOException e) {
            throw ExceptionsHelper.convertToElastic(e);
        } finally {
            SearchContext.current().clearReleasables(Lifetime.COLLECTION);
        }
-
-        hitContext.hit().matchedQueries(matchedQueries.toArray(new String[matchedQueries.size()]));
    }

-    private void addMatchedQueries(HitContext hitContext, Map<String, Query> namedQueries, List<String> matchedQueries) throws IOException {
-        for (Map.Entry<String, Query> entry : namedQueries.entrySet()) {
-            String name = entry.getKey();
-            Query filter = entry.getValue();
+    @Override
+    public boolean hitExecutionNeeded(SearchContext context) {
+        return false;
+    }

-            final Weight weight = hitContext.topLevelSearcher().createNormalizedWeight(filter, false);
-            final Scorer scorer = weight.scorer(hitContext.readerContext());
-            if (scorer == null) {
-                continue;
-            }
-            final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
-            if (twoPhase == null) {
-                if (scorer.iterator().advance(hitContext.docId()) == hitContext.docId()) {
-                    matchedQueries.add(name);
-                }
-            } else {
-                if (twoPhase.approximation().advance(hitContext.docId()) == hitContext.docId() && twoPhase.matches()) {
-                    matchedQueries.add(name);
-                }
-            }
-        }
+    @Override
+    public void hitExecute(SearchContext context, HitContext hitContext) {
+        // we do everything in hitsExecute
    }
 }