- TopChildrenQuery cleanup.

- Added class level jdocs for TopChildrenQuery and ChildrenQuery.
This commit is contained in:
Martijn van Groningen 2013-01-31 23:38:09 +01:00
parent 1a1df06411
commit 7ef65688cd
3 changed files with 81 additions and 60 deletions

View File

@ -121,7 +121,7 @@ public class TopChildrenQueryParser implements QueryParser {
query = new XFilteredQuery(query, parseContext.cacheFilter(childDocMapper.typeFilter(), null));
SearchContext searchContext = SearchContext.current();
TopChildrenQuery childQuery = new TopChildrenQuery(query, childType, parentType, scoreType, factor, incrementalFactor);
TopChildrenQuery childQuery = new TopChildrenQuery(searchContext, query, childType, parentType, scoreType, factor, incrementalFactor);
searchContext.addRewrite(childQuery);
return childQuery;
}

View File

@ -40,9 +40,13 @@ import java.io.IOException;
import java.util.Set;
/**
* A query implementation that executes the wrapped child query and
* connects the matching child docs to the related parent documents
* using the {@link IdReaderTypeCache}.
* A query implementation that executes the wrapped child query and connects all the matching child docs to the related
* parent documents using the {@link IdReaderTypeCache}.
* <p/>
* This query is executed in two rounds. The first round resolves all the matching child documents and groups these
* documents by parent uid value. Also the child scores are aggregated per parent uid value. During the second round
* all parent documents having the same uid value that is collected in the first phase are emitted as hit including
* a score based on the aggregated child scores and score type.
*/
public class ChildrenQuery extends Query implements SearchContext.Rewrite {

View File

@ -25,41 +25,46 @@ import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.search.EmptyScorer;
import org.elasticsearch.common.trove.ExtTHashMap;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.*;
/**
*
* A query that evaluates the top matching child documents (based on the score) in order to determine what
* parent documents to return. This query tries to find just enough child documents to return the the requested
* number of parent documents (or less if no other child document can be found).
* <p/>
* This query executes several internal searches. In the first round it tries to find ((request offset + requested size) * factor)
* child documents. The resulting child documents are mapped into their parent documents including the aggragted child scores.
* If not enough parent documents could be resolved then a subsequent round is executed, requesting previous requested
* documents times incremental_factor. This logic repeats until enough parent documents are resolved or until no more
* child documents are available.
* <p/>
* This query is most of the times faster than the {@link ChildrenQuery}. Usually enough parent documents can be returned
* in the first child document query round.
*/
public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
private Query query;
private String parentType;
private String childType;
private ScoreType scoreType;
private int factor;
private int incrementalFactor;
private Map<Object, ParentDoc[]> parentDocs;
// Actual value can get lost during query rewriting in dfs phase, but this isn't an issue now.
private int numHits = 0;
private final SearchContext searchContext;
private final Query query;
private final String parentType;
private final String childType;
private final ScoreType scoreType;
private final int factor;
private final int incrementalFactor;
// Need to know if this query is properly used, otherwise the results are unexpected for example in the count api
// Need to use boolean array instead of boolean primitive... b/c during query rewriting in dfs phase
private boolean[] properlyInvoked = new boolean[]{false};
private boolean properlyInvoked = false;
private ExtTHashMap<Object, ParentDoc[]> parentDocs;
// Note, the query is expected to already be filtered to only child type docs
public TopChildrenQuery(Query query, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
public TopChildrenQuery(SearchContext searchContext, Query query, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
this.searchContext = searchContext;
this.query = query;
this.childType = childType;
this.parentType = parentType;
@ -68,54 +73,61 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
this.incrementalFactor = incrementalFactor;
}
private TopChildrenQuery(TopChildrenQuery existing, Query rewrittenChildQuery) {
this.searchContext = existing.searchContext;
this.query = rewrittenChildQuery;
this.parentType = existing.parentType;
this.childType = existing.childType;
this.scoreType = existing.scoreType;
this.factor = existing.factor;
this.incrementalFactor = existing.incrementalFactor;
this.parentDocs = existing.parentDocs;
this.properlyInvoked = existing.properlyInvoked;
}
@Override
public void contextRewrite(SearchContext searchContext) throws Exception {
properlyInvoked = true;
this.parentDocs = CacheRecycler.popHashMap();
searchContext.idCache().refresh(searchContext.searcher().getTopReaderContext().leaves());
int numDocs = (searchContext.from() + searchContext.size());
if (numDocs == 0) {
numDocs = 1;
int parentHitsResolved;
int numChildDocs = (searchContext.from() + searchContext.size());
if (numChildDocs == 0) {
numChildDocs = 1;
}
numDocs *= factor;
numChildDocs *= factor;
while (true) {
clear();
// if (topDocsPhase.scope() != null) {
// searchContext.searcher().processingScope(topDocsPhase.scope());
// }
TopDocs topDocs = searchContext.searcher().search(query, numDocs);
// if (topDocsPhase.scope() != null) {
// we mark the scope as processed, so we don't process it again, even if we need to rerun the query...
// searchContext.searcher().processedScope();
// }
processResults(topDocs, searchContext);
parentDocs.clear();
TopDocs topChildDocs = searchContext.searcher().search(query, numChildDocs);
parentHitsResolved = resolveParentDocuments(topChildDocs, searchContext);
// check if we found enough docs, if so, break
if (numHits >= (searchContext.from() + searchContext.size())) {
if (parentHitsResolved >= (searchContext.from() + searchContext.size())) {
break;
}
// if we did not find enough docs, check if it make sense to search further
if (topDocs.totalHits <= numDocs) {
if (topChildDocs.totalHits <= numChildDocs) {
break;
}
// if not, update numDocs, and search again
numDocs *= incrementalFactor;
if (numDocs > topDocs.totalHits) {
numDocs = topDocs.totalHits;
numChildDocs *= incrementalFactor;
if (numChildDocs > topChildDocs.totalHits) {
numChildDocs = topChildDocs.totalHits;
}
}
}
@Override
public void contextClear() {
if (parentDocs != null) {
CacheRecycler.pushHashMap(parentDocs);
parentDocs = null;
}
}
void clear() {
properlyInvoked[0] = true;
parentDocs = null;
numHits = 0;
}
public void processResults(TopDocs topDocs, SearchContext context) {
int resolveParentDocuments(TopDocs topDocs, SearchContext context) {
int parentHitsResolved = 0;
Map<Object, TIntObjectHashMap<ParentDoc>> parentDocsPerReader = new HashMap<Object, TIntObjectHashMap<ParentDoc>>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
@ -144,7 +156,7 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
ParentDoc parentDoc = readerParentDocs.get(parentDocId);
if (parentDoc == null) {
numHits++; // we have a hit on a parent
parentHitsResolved++; // we have a hit on a parent
parentDoc = new ParentDoc();
parentDoc.docId = parentDocId;
parentDoc.count = 1;
@ -162,12 +174,13 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
}
}
this.parentDocs = new HashMap<Object, ParentDoc[]>();
for (Map.Entry<Object, TIntObjectHashMap<ParentDoc>> entry : parentDocsPerReader.entrySet()) {
ParentDoc[] values = entry.getValue().values(new ParentDoc[entry.getValue().size()]);
Arrays.sort(values, PARENT_DOC_COMP);
parentDocs.put(entry.getKey(), values);
}
return parentHitsResolved;
}
private static final ParentDocComparator PARENT_DOC_COMP = new ParentDocComparator();
@ -179,7 +192,7 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
}
}
public static class ParentDoc {
static class ParentDoc {
public int docId;
public int count;
public float maxScore = Float.NaN;
@ -188,11 +201,14 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newQ = query.rewrite(reader);
if (newQ == query) return this;
TopChildrenQuery bq = (TopChildrenQuery) this.clone();
bq.query = newQ;
return bq;
Query rewrittenChildQuery = query.rewrite(reader);
if (rewrittenChildQuery == query) {
return this;
}
int index = searchContext.rewrites().indexOf(this);
TopChildrenQuery rewrite = new TopChildrenQuery(this, rewrittenChildQuery);
searchContext.rewrites().set(index, rewrite);
return rewrite;
}
@Override
@ -202,14 +218,15 @@ public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
if (!properlyInvoked[0]) {
if (!properlyInvoked) {
throw new ElasticSearchIllegalStateException("top_children query hasn't executed properly");
}
if (parentDocs != null) {
return new ParentWeight(searcher, query.createWeight(searcher));
} else {
return query.createWeight(searcher);
}
return query.createWeight(searcher);
}
public String toString(String field) {