- TopChildrenQuery cleanup.

- Added class level jdocs for TopChildrenQuery and ChildrenQuery.
2025-03-09 14:34:43 +00:00 · 2013-01-31 23:38:09 +01:00 · 2013-01-31 23:38:09 +01:00 · 7ef65688cd
commit 7ef65688cd
parent 1a1df06411
3 changed files with 81 additions and 60 deletions
--- a/src/main/java/org/elasticsearch/index/query/TopChildrenQueryParser.java
+++ b/src/main/java/org/elasticsearch/index/query/TopChildrenQueryParser.java
@ -121,7 +121,7 @@ public class TopChildrenQueryParser implements QueryParser {
        query = new XFilteredQuery(query, parseContext.cacheFilter(childDocMapper.typeFilter(), null));

        SearchContext searchContext = SearchContext.current();
-        TopChildrenQuery childQuery = new TopChildrenQuery(query, childType, parentType, scoreType, factor, incrementalFactor);
+        TopChildrenQuery childQuery = new TopChildrenQuery(searchContext, query, childType, parentType, scoreType, factor, incrementalFactor);
        searchContext.addRewrite(childQuery);
        return childQuery;
    }
--- a/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java
+++ b/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java
@ -40,9 +40,13 @@ import java.io.IOException;
 import java.util.Set;

 /**
- * A query implementation that executes the wrapped child query and
- * connects the matching child docs to the related parent documents
- * using the {@link IdReaderTypeCache}.
+ * A query implementation that executes the wrapped child query and connects all the matching child docs to the related
+ * parent documents using the {@link IdReaderTypeCache}.
+ * <p/>
+ * This query is executed in two rounds. The first round resolves all the matching child documents and groups these
+ * documents by parent uid value. Also the child scores are aggregated per parent uid value. During the second round
+ * all parent documents having the same uid value that is collected in the first phase are emitted as hit including
+ * a score based on the aggregated child scores and score type.
 */
 public class ChildrenQuery extends Query implements SearchContext.Rewrite {

--- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java
+++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java
@ -25,41 +25,46 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.ToStringUtils;
 import org.elasticsearch.ElasticSearchIllegalStateException;
+import org.elasticsearch.common.CacheRecycler;
 import org.elasticsearch.common.bytes.HashedBytesArray;
 import org.elasticsearch.common.lucene.search.EmptyScorer;
+import org.elasticsearch.common.trove.ExtTHashMap;
 import org.elasticsearch.search.internal.SearchContext;

 import java.io.IOException;
 import java.util.*;

 /**
- *
+ * A query that evaluates the top matching child documents (based on the score) in order to determine what
+ * parent documents to return. This query tries to find just enough child documents to return the the requested
+ * number of parent documents (or less if no other child document can be found).
+ * <p/>
+ * This query executes several internal searches. In the first round it tries to find ((request offset + requested size) * factor)
+ * child documents. The resulting child documents are mapped into their parent documents including the aggragted child scores.
+ * If not enough parent documents could be resolved then a subsequent round is executed, requesting previous requested
+ * documents times incremental_factor. This logic repeats until enough parent documents are resolved or until no more
+ * child documents are available.
+ * <p/>
+ * This query is most of the times faster than the {@link ChildrenQuery}. Usually enough parent documents can be returned
+ * in the first child document query round.
 */
 public class TopChildrenQuery extends Query implements SearchContext.Rewrite {

-    private Query query;
-
-    private String parentType;
-
-    private String childType;
-
-    private ScoreType scoreType;
-
-    private int factor;
-
-    private int incrementalFactor;
-
-    private Map<Object, ParentDoc[]> parentDocs;
-
-    // Actual value can get lost during query rewriting in dfs phase, but this isn't an issue now.
-    private int numHits = 0;
+    private final SearchContext searchContext;
+    private final Query query;
+    private final String parentType;
+    private final String childType;
+    private final ScoreType scoreType;
+    private final int factor;
+    private final int incrementalFactor;

    // Need to know if this query is properly used, otherwise the results are unexpected for example in the count api
-    // Need to use boolean array instead of boolean primitive... b/c during query rewriting in dfs phase
-    private boolean[] properlyInvoked = new boolean[]{false};
+    private boolean properlyInvoked = false;
+    private ExtTHashMap<Object, ParentDoc[]> parentDocs;

    // Note, the query is expected to already be filtered to only child type docs
-    public TopChildrenQuery(Query query, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
+    public TopChildrenQuery(SearchContext searchContext, Query query, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
+        this.searchContext = searchContext;
        this.query = query;
        this.childType = childType;
        this.parentType = parentType;
@ -68,54 +73,61 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
        this.incrementalFactor = incrementalFactor;
    }

+    private TopChildrenQuery(TopChildrenQuery existing, Query rewrittenChildQuery) {
+        this.searchContext = existing.searchContext;
+        this.query = rewrittenChildQuery;
+        this.parentType = existing.parentType;
+        this.childType = existing.childType;
+        this.scoreType = existing.scoreType;
+        this.factor = existing.factor;
+        this.incrementalFactor = existing.incrementalFactor;
+        this.parentDocs = existing.parentDocs;
+        this.properlyInvoked = existing.properlyInvoked;
+    }
+
    @Override
    public void contextRewrite(SearchContext searchContext) throws Exception {
+        properlyInvoked = true;
+        this.parentDocs = CacheRecycler.popHashMap();
        searchContext.idCache().refresh(searchContext.searcher().getTopReaderContext().leaves());

-        int numDocs = (searchContext.from() + searchContext.size());
-        if (numDocs == 0) {
-            numDocs = 1;
+        int parentHitsResolved;
+        int numChildDocs = (searchContext.from() + searchContext.size());
+        if (numChildDocs == 0) {
+            numChildDocs = 1;
        }
-        numDocs *= factor;
+        numChildDocs *= factor;
        while (true) {
-            clear();
-//            if (topDocsPhase.scope() != null) {
-//                searchContext.searcher().processingScope(topDocsPhase.scope());
-//            }
-            TopDocs topDocs = searchContext.searcher().search(query, numDocs);
-//            if (topDocsPhase.scope() != null) {
-            // we mark the scope as processed, so we don't process it again, even if we need to rerun the query...
-//                searchContext.searcher().processedScope();
-//            }
-            processResults(topDocs, searchContext);
+            parentDocs.clear();
+            TopDocs topChildDocs = searchContext.searcher().search(query, numChildDocs);
+            parentHitsResolved = resolveParentDocuments(topChildDocs, searchContext);

            // check if we found enough docs, if so, break
-            if (numHits >= (searchContext.from() + searchContext.size())) {
+            if (parentHitsResolved >= (searchContext.from() + searchContext.size())) {
                break;
            }
            // if we did not find enough docs, check if it make sense to search further
-            if (topDocs.totalHits <= numDocs) {
+            if (topChildDocs.totalHits <= numChildDocs) {
                break;
            }
            // if not, update numDocs, and search again
-            numDocs *= incrementalFactor;
-            if (numDocs > topDocs.totalHits) {
-                numDocs = topDocs.totalHits;
+            numChildDocs *= incrementalFactor;
+            if (numChildDocs > topChildDocs.totalHits) {
+                numChildDocs = topChildDocs.totalHits;
            }
        }
    }

    @Override
    public void contextClear() {
+        if (parentDocs != null) {
+            CacheRecycler.pushHashMap(parentDocs);
+            parentDocs = null;
+        }
    }

-    void clear() {
-        properlyInvoked[0] = true;
-        parentDocs = null;
-        numHits = 0;
-    }
-
-    public void processResults(TopDocs topDocs, SearchContext context) {
+    int resolveParentDocuments(TopDocs topDocs, SearchContext context) {
+        int parentHitsResolved = 0;
        Map<Object, TIntObjectHashMap<ParentDoc>> parentDocsPerReader = new HashMap<Object, TIntObjectHashMap<ParentDoc>>();
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
@ -144,7 +156,7 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {

                    ParentDoc parentDoc = readerParentDocs.get(parentDocId);
                    if (parentDoc == null) {
-                        numHits++; // we have a hit on a parent
+                        parentHitsResolved++; // we have a hit on a parent
                        parentDoc = new ParentDoc();
                        parentDoc.docId = parentDocId;
                        parentDoc.count = 1;
@ -162,12 +174,13 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
            }
        }

-        this.parentDocs = new HashMap<Object, ParentDoc[]>();
        for (Map.Entry<Object, TIntObjectHashMap<ParentDoc>> entry : parentDocsPerReader.entrySet()) {
            ParentDoc[] values = entry.getValue().values(new ParentDoc[entry.getValue().size()]);
            Arrays.sort(values, PARENT_DOC_COMP);
            parentDocs.put(entry.getKey(), values);
        }
+
+        return parentHitsResolved;
    }

    private static final ParentDocComparator PARENT_DOC_COMP = new ParentDocComparator();
@ -179,7 +192,7 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
        }
    }

-    public static class ParentDoc {
+    static class ParentDoc {
        public int docId;
        public int count;
        public float maxScore = Float.NaN;
@ -188,11 +201,14 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {

    @Override
    public Query rewrite(IndexReader reader) throws IOException {
-        Query newQ = query.rewrite(reader);
-        if (newQ == query) return this;
-        TopChildrenQuery bq = (TopChildrenQuery) this.clone();
-        bq.query = newQ;
-        return bq;
+        Query rewrittenChildQuery = query.rewrite(reader);
+        if (rewrittenChildQuery == query) {
+            return this;
+        }
+        int index = searchContext.rewrites().indexOf(this);
+        TopChildrenQuery rewrite = new TopChildrenQuery(this, rewrittenChildQuery);
+        searchContext.rewrites().set(index, rewrite);
+        return rewrite;
    }

    @Override
@ -202,14 +218,15 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {

    @Override
    public Weight createWeight(IndexSearcher searcher) throws IOException {
-        if (!properlyInvoked[0]) {
+        if (!properlyInvoked) {
            throw new ElasticSearchIllegalStateException("top_children query hasn't executed properly");
        }

        if (parentDocs != null) {
            return new ParentWeight(searcher, query.createWeight(searcher));
+        } else {
+            return query.createWeight(searcher);
        }
-        return query.createWeight(searcher);
    }

    public String toString(String field) {