- TopChildrenQuery cleanup.
- Added class level jdocs for TopChildrenQuery and ChildrenQuery.
This commit is contained in:
parent
1a1df06411
commit
7ef65688cd
|
@ -121,7 +121,7 @@ public class TopChildrenQueryParser implements QueryParser {
|
|||
query = new XFilteredQuery(query, parseContext.cacheFilter(childDocMapper.typeFilter(), null));
|
||||
|
||||
SearchContext searchContext = SearchContext.current();
|
||||
TopChildrenQuery childQuery = new TopChildrenQuery(query, childType, parentType, scoreType, factor, incrementalFactor);
|
||||
TopChildrenQuery childQuery = new TopChildrenQuery(searchContext, query, childType, parentType, scoreType, factor, incrementalFactor);
|
||||
searchContext.addRewrite(childQuery);
|
||||
return childQuery;
|
||||
}
|
||||
|
|
|
@ -40,9 +40,13 @@ import java.io.IOException;
|
|||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A query implementation that executes the wrapped child query and
|
||||
* connects the matching child docs to the related parent documents
|
||||
* using the {@link IdReaderTypeCache}.
|
||||
* A query implementation that executes the wrapped child query and connects all the matching child docs to the related
|
||||
* parent documents using the {@link IdReaderTypeCache}.
|
||||
* <p/>
|
||||
* This query is executed in two rounds. The first round resolves all the matching child documents and groups these
|
||||
* documents by parent uid value. Also the child scores are aggregated per parent uid value. During the second round
|
||||
* all parent documents having the same uid value that is collected in the first phase are emitted as hit including
|
||||
* a score based on the aggregated child scores and score type.
|
||||
*/
|
||||
public class ChildrenQuery extends Query implements SearchContext.Rewrite {
|
||||
|
||||
|
|
|
@ -25,41 +25,46 @@ import org.apache.lucene.search.*;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.common.CacheRecycler;
|
||||
import org.elasticsearch.common.bytes.HashedBytesArray;
|
||||
import org.elasticsearch.common.lucene.search.EmptyScorer;
|
||||
import org.elasticsearch.common.trove.ExtTHashMap;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
* A query that evaluates the top matching child documents (based on the score) in order to determine what
|
||||
* parent documents to return. This query tries to find just enough child documents to return the the requested
|
||||
* number of parent documents (or less if no other child document can be found).
|
||||
* <p/>
|
||||
* This query executes several internal searches. In the first round it tries to find ((request offset + requested size) * factor)
|
||||
* child documents. The resulting child documents are mapped into their parent documents including the aggragted child scores.
|
||||
* If not enough parent documents could be resolved then a subsequent round is executed, requesting previous requested
|
||||
* documents times incremental_factor. This logic repeats until enough parent documents are resolved or until no more
|
||||
* child documents are available.
|
||||
* <p/>
|
||||
* This query is most of the times faster than the {@link ChildrenQuery}. Usually enough parent documents can be returned
|
||||
* in the first child document query round.
|
||||
*/
|
||||
public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
||||
|
||||
private Query query;
|
||||
|
||||
private String parentType;
|
||||
|
||||
private String childType;
|
||||
|
||||
private ScoreType scoreType;
|
||||
|
||||
private int factor;
|
||||
|
||||
private int incrementalFactor;
|
||||
|
||||
private Map<Object, ParentDoc[]> parentDocs;
|
||||
|
||||
// Actual value can get lost during query rewriting in dfs phase, but this isn't an issue now.
|
||||
private int numHits = 0;
|
||||
private final SearchContext searchContext;
|
||||
private final Query query;
|
||||
private final String parentType;
|
||||
private final String childType;
|
||||
private final ScoreType scoreType;
|
||||
private final int factor;
|
||||
private final int incrementalFactor;
|
||||
|
||||
// Need to know if this query is properly used, otherwise the results are unexpected for example in the count api
|
||||
// Need to use boolean array instead of boolean primitive... b/c during query rewriting in dfs phase
|
||||
private boolean[] properlyInvoked = new boolean[]{false};
|
||||
private boolean properlyInvoked = false;
|
||||
private ExtTHashMap<Object, ParentDoc[]> parentDocs;
|
||||
|
||||
// Note, the query is expected to already be filtered to only child type docs
|
||||
public TopChildrenQuery(Query query, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
|
||||
public TopChildrenQuery(SearchContext searchContext, Query query, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
|
||||
this.searchContext = searchContext;
|
||||
this.query = query;
|
||||
this.childType = childType;
|
||||
this.parentType = parentType;
|
||||
|
@ -68,54 +73,61 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
this.incrementalFactor = incrementalFactor;
|
||||
}
|
||||
|
||||
private TopChildrenQuery(TopChildrenQuery existing, Query rewrittenChildQuery) {
|
||||
this.searchContext = existing.searchContext;
|
||||
this.query = rewrittenChildQuery;
|
||||
this.parentType = existing.parentType;
|
||||
this.childType = existing.childType;
|
||||
this.scoreType = existing.scoreType;
|
||||
this.factor = existing.factor;
|
||||
this.incrementalFactor = existing.incrementalFactor;
|
||||
this.parentDocs = existing.parentDocs;
|
||||
this.properlyInvoked = existing.properlyInvoked;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void contextRewrite(SearchContext searchContext) throws Exception {
|
||||
properlyInvoked = true;
|
||||
this.parentDocs = CacheRecycler.popHashMap();
|
||||
searchContext.idCache().refresh(searchContext.searcher().getTopReaderContext().leaves());
|
||||
|
||||
int numDocs = (searchContext.from() + searchContext.size());
|
||||
if (numDocs == 0) {
|
||||
numDocs = 1;
|
||||
int parentHitsResolved;
|
||||
int numChildDocs = (searchContext.from() + searchContext.size());
|
||||
if (numChildDocs == 0) {
|
||||
numChildDocs = 1;
|
||||
}
|
||||
numDocs *= factor;
|
||||
numChildDocs *= factor;
|
||||
while (true) {
|
||||
clear();
|
||||
// if (topDocsPhase.scope() != null) {
|
||||
// searchContext.searcher().processingScope(topDocsPhase.scope());
|
||||
// }
|
||||
TopDocs topDocs = searchContext.searcher().search(query, numDocs);
|
||||
// if (topDocsPhase.scope() != null) {
|
||||
// we mark the scope as processed, so we don't process it again, even if we need to rerun the query...
|
||||
// searchContext.searcher().processedScope();
|
||||
// }
|
||||
processResults(topDocs, searchContext);
|
||||
parentDocs.clear();
|
||||
TopDocs topChildDocs = searchContext.searcher().search(query, numChildDocs);
|
||||
parentHitsResolved = resolveParentDocuments(topChildDocs, searchContext);
|
||||
|
||||
// check if we found enough docs, if so, break
|
||||
if (numHits >= (searchContext.from() + searchContext.size())) {
|
||||
if (parentHitsResolved >= (searchContext.from() + searchContext.size())) {
|
||||
break;
|
||||
}
|
||||
// if we did not find enough docs, check if it make sense to search further
|
||||
if (topDocs.totalHits <= numDocs) {
|
||||
if (topChildDocs.totalHits <= numChildDocs) {
|
||||
break;
|
||||
}
|
||||
// if not, update numDocs, and search again
|
||||
numDocs *= incrementalFactor;
|
||||
if (numDocs > topDocs.totalHits) {
|
||||
numDocs = topDocs.totalHits;
|
||||
numChildDocs *= incrementalFactor;
|
||||
if (numChildDocs > topChildDocs.totalHits) {
|
||||
numChildDocs = topChildDocs.totalHits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void contextClear() {
|
||||
if (parentDocs != null) {
|
||||
CacheRecycler.pushHashMap(parentDocs);
|
||||
parentDocs = null;
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
properlyInvoked[0] = true;
|
||||
parentDocs = null;
|
||||
numHits = 0;
|
||||
}
|
||||
|
||||
public void processResults(TopDocs topDocs, SearchContext context) {
|
||||
int resolveParentDocuments(TopDocs topDocs, SearchContext context) {
|
||||
int parentHitsResolved = 0;
|
||||
Map<Object, TIntObjectHashMap<ParentDoc>> parentDocsPerReader = new HashMap<Object, TIntObjectHashMap<ParentDoc>>();
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
|
||||
|
@ -144,7 +156,7 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
|
||||
ParentDoc parentDoc = readerParentDocs.get(parentDocId);
|
||||
if (parentDoc == null) {
|
||||
numHits++; // we have a hit on a parent
|
||||
parentHitsResolved++; // we have a hit on a parent
|
||||
parentDoc = new ParentDoc();
|
||||
parentDoc.docId = parentDocId;
|
||||
parentDoc.count = 1;
|
||||
|
@ -162,12 +174,13 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
}
|
||||
}
|
||||
|
||||
this.parentDocs = new HashMap<Object, ParentDoc[]>();
|
||||
for (Map.Entry<Object, TIntObjectHashMap<ParentDoc>> entry : parentDocsPerReader.entrySet()) {
|
||||
ParentDoc[] values = entry.getValue().values(new ParentDoc[entry.getValue().size()]);
|
||||
Arrays.sort(values, PARENT_DOC_COMP);
|
||||
parentDocs.put(entry.getKey(), values);
|
||||
}
|
||||
|
||||
return parentHitsResolved;
|
||||
}
|
||||
|
||||
private static final ParentDocComparator PARENT_DOC_COMP = new ParentDocComparator();
|
||||
|
@ -179,7 +192,7 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
}
|
||||
}
|
||||
|
||||
public static class ParentDoc {
|
||||
static class ParentDoc {
|
||||
public int docId;
|
||||
public int count;
|
||||
public float maxScore = Float.NaN;
|
||||
|
@ -188,11 +201,14 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query newQ = query.rewrite(reader);
|
||||
if (newQ == query) return this;
|
||||
TopChildrenQuery bq = (TopChildrenQuery) this.clone();
|
||||
bq.query = newQ;
|
||||
return bq;
|
||||
Query rewrittenChildQuery = query.rewrite(reader);
|
||||
if (rewrittenChildQuery == query) {
|
||||
return this;
|
||||
}
|
||||
int index = searchContext.rewrites().indexOf(this);
|
||||
TopChildrenQuery rewrite = new TopChildrenQuery(this, rewrittenChildQuery);
|
||||
searchContext.rewrites().set(index, rewrite);
|
||||
return rewrite;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -202,14 +218,15 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
|
|||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||
if (!properlyInvoked[0]) {
|
||||
if (!properlyInvoked) {
|
||||
throw new ElasticSearchIllegalStateException("top_children query hasn't executed properly");
|
||||
}
|
||||
|
||||
if (parentDocs != null) {
|
||||
return new ParentWeight(searcher, query.createWeight(searcher));
|
||||
} else {
|
||||
return query.createWeight(searcher);
|
||||
}
|
||||
return query.createWeight(searcher);
|
||||
}
|
||||
|
||||
public String toString(String field) {
|
||||
|
|
Loading…
Reference in New Issue