Optimize `has_child` query & filter execution with two short circuit mechanisms:

* If all parent ids have been emitted as hit, abort the query / filter execution.
* If the a relative small number of parent ids have been collected in the first phase then limit the number of second phase parent id lookups by putting a short circuit filter before parent document evaluation or omit the it in the case of the filter. This is contrable via the `short_circuit_cutoff` option which is exposed in the `has_child` query & filter.

All parent / child queries and filters (expect `top_children` query) abort execution if no parent ids have been collected in the first phase.

Closes #3190
This commit is contained in:
Martijn van Groningen 2013-06-30 20:48:18 +02:00
parent c222ce28fc
commit 4d05c9cfd5
13 changed files with 593 additions and 120 deletions

View File

@ -36,6 +36,8 @@ public abstract class MatchDocIdSet extends DocIdSet implements Bits {
private final int maxDoc;
private final Bits acceptDocs;
private DocIdSetIterator iterator;
protected MatchDocIdSet(int maxDoc, @Nullable Bits acceptDocs) {
this.maxDoc = maxDoc;
this.acceptDocs = acceptDocs;
@ -49,11 +51,11 @@ public abstract class MatchDocIdSet extends DocIdSet implements Bits {
@Override
public DocIdSetIterator iterator() throws IOException {
if (acceptDocs == null) {
return new NoAcceptDocsIterator(maxDoc);
return iterator = new NoAcceptDocsIterator(maxDoc);
} else if (acceptDocs instanceof FixedBitSet) {
return new FixedBitSetIterator(((DocIdSet) acceptDocs).iterator());
return iterator = new FixedBitSetIterator(((DocIdSet) acceptDocs).iterator());
} else {
return new BothIterator(maxDoc, acceptDocs);
return iterator = new BothIterator(maxDoc, acceptDocs);
}
}
@ -72,6 +74,17 @@ public abstract class MatchDocIdSet extends DocIdSet implements Bits {
return maxDoc;
}
/**
* Short circuiting the doc id set, by advancing to beyond the last doc id.
*/
public void shortCircuit() {
try {
iterator.advance(DocIdSetIterator.NO_MORE_DOCS);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
class NoAcceptDocsIterator extends DocIdSetIterator {
private final int maxDoc;

View File

@ -19,14 +19,15 @@
package org.elasticsearch.index.mapper;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.BytesRefs;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
*
*/
@ -135,6 +136,12 @@ public final class Uid {
return ref;
}
public static void createUidAsBytes(BytesRef type, BytesRef id, BytesRef spare) {
spare.copyBytes(type);
spare.append(DELIMITER_BYTES);
spare.append(id);
}
public static BytesRef[] createTypeUids(Collection<String> types, Object ids) {
return createTypeUids(types, Collections.singletonList(ids));
}

View File

@ -34,6 +34,7 @@ public class HasChildFilterBuilder extends BaseFilterBuilder {
private String filterName;
private Boolean cache;
private String cacheKey;
private Integer shortCircuitCutoff;
public HasChildFilterBuilder(String type, QueryBuilder queryBuilder) {
this.childType = type;
@ -72,6 +73,15 @@ public class HasChildFilterBuilder extends BaseFilterBuilder {
return this;
}
/**
* Configures at what cut off point only to evaluate parent documents that contain the matching parent id terms
* instead of evaluating all parent docs.
*/
public HasChildFilterBuilder setShortCircuitCutoff(int shortCircuitCutoff) {
this.shortCircuitCutoff = shortCircuitCutoff;
return this;
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(HasChildFilterParser.NAME);
@ -92,6 +102,9 @@ public class HasChildFilterBuilder extends BaseFilterBuilder {
if (cacheKey != null) {
builder.field("_cache_key", cacheKey);
}
if (shortCircuitCutoff != null) {
builder.field("short_circuit_cutoff", shortCircuitCutoff);
}
builder.endObject();
}
}

View File

@ -57,6 +57,7 @@ public class HasChildFilterParser implements FilterParser {
Query query = null;
boolean queryFound = false;
String childType = null;
int shortCircuitParentDocSet = 8192; // Tests show a cut of point between 8192 and 16384.
boolean cache = false;
CacheKeyFilter.Key cacheKey = null;
@ -101,6 +102,8 @@ public class HasChildFilterParser implements FilterParser {
cache = parser.booleanValue();
} else if ("_cache_key".equals(currentFieldName) || "_cacheKey".equals(currentFieldName)) {
cacheKey = new CacheKeyFilter.Key(parser.text());
} else if ("short_circuit_cutoff".equals(currentFieldName)) {
shortCircuitParentDocSet = parser.intValue();
} else {
throw new QueryParsingException(parseContext.index(), "[has_child] filter does not support [" + currentFieldName + "]");
}
@ -139,7 +142,7 @@ public class HasChildFilterParser implements FilterParser {
}
Filter parentFilter = parseContext.cacheFilter(parentDocMapper.typeFilter(), null);
HasChildFilter childFilter = new HasChildFilter(query, parentType, childType, parentFilter, searchContext);
HasChildFilter childFilter = new HasChildFilter(query, parentType, childType, parentFilter, searchContext, shortCircuitParentDocSet);
searchContext.addRewrite(childFilter);
Filter filter = childFilter;

View File

@ -36,6 +36,8 @@ public class HasChildQueryBuilder extends BaseQueryBuilder implements BoostableQ
private String scoreType;
private Integer shortCircuitCutoff;
public HasChildQueryBuilder(String type, QueryBuilder queryBuilder) {
this.childType = type;
this.queryBuilder = queryBuilder;
@ -58,6 +60,15 @@ public class HasChildQueryBuilder extends BaseQueryBuilder implements BoostableQ
return this;
}
/**
* Configures at what cut off point only to evaluate parent documents that contain the matching parent id terms
* instead of evaluating all parent docs.
*/
public HasChildQueryBuilder setShortCircuitCutoff(int shortCircuitCutoff) {
this.shortCircuitCutoff = shortCircuitCutoff;
return this;
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(HasChildQueryParser.NAME);
@ -70,6 +81,9 @@ public class HasChildQueryBuilder extends BaseQueryBuilder implements BoostableQ
if (scoreType != null) {
builder.field("score_type", scoreType);
}
if (shortCircuitCutoff != null) {
builder.field("short_circuit_cutoff", shortCircuitCutoff);
}
builder.endObject();
}
}

View File

@ -59,6 +59,7 @@ public class HasChildQueryParser implements QueryParser {
float boost = 1.0f;
String childType = null;
ScoreType scoreType = null;
int shortCircuitParentDocSet = 8192;
String currentFieldName = null;
XContentParser.Token token;
@ -96,6 +97,8 @@ public class HasChildQueryParser implements QueryParser {
}
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("short_circuit_cutoff".equals(currentFieldName)) {
shortCircuitParentDocSet = parser.intValue();
} else {
throw new QueryParsingException(parseContext.index(), "[has_child] query does not support [" + currentFieldName + "]");
}
@ -135,11 +138,11 @@ public class HasChildQueryParser implements QueryParser {
Query query;
Filter parentFilter = parseContext.cacheFilter(parentDocMapper.typeFilter(), null);
if (scoreType != null) {
ChildrenQuery childrenQuery = new ChildrenQuery(searchContext, parentType, childType, parentFilter, innerQuery, scoreType);
ChildrenQuery childrenQuery = new ChildrenQuery(searchContext, parentType, childType, parentFilter, innerQuery, scoreType, shortCircuitParentDocSet);
searchContext.addRewrite(childrenQuery);
query = childrenQuery;
} else {
HasChildFilter hasChildFilter = new HasChildFilter(innerQuery, parentType, childType, parentFilter, searchContext);
HasChildFilter hasChildFilter = new HasChildFilter(innerQuery, parentType, childType, parentFilter, searchContext, shortCircuitParentDocSet);
searchContext.addRewrite(hasChildFilter);
query = new XConstantScoreQuery(hasChildFilter);
}

View File

@ -19,8 +19,6 @@
package org.elasticsearch.index.search.child;
import gnu.trove.map.TObjectFloatMap;
import gnu.trove.map.TObjectIntMap;
import gnu.trove.map.hash.TObjectFloatHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;
import org.apache.lucene.index.AtomicReaderContext;
@ -28,12 +26,16 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.index.cache.id.IdReaderTypeCache;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
@ -57,18 +59,20 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
private final Filter parentFilter;
private final ScoreType scoreType;
private final Query originalChildQuery;
private final int shortCircuitParentDocSet;
private Query rewrittenChildQuery;
private TObjectFloatHashMap<HashedBytesArray> uidToScore;
private TObjectIntHashMap<HashedBytesArray> uidToCount;
public ChildrenQuery(SearchContext searchContext, String parentType, String childType, Filter parentFilter, Query childQuery, ScoreType scoreType) {
public ChildrenQuery(SearchContext searchContext, String parentType, String childType, Filter parentFilter, Query childQuery, ScoreType scoreType, int shortCircuitParentDocSet) {
this.searchContext = searchContext;
this.parentType = parentType;
this.childType = childType;
this.parentFilter = new ApplyAcceptedDocsFilter(parentFilter);
this.originalChildQuery = childQuery;
this.scoreType = scoreType;
this.shortCircuitParentDocSet = shortCircuitParentDocSet;
}
@Override
@ -160,15 +164,33 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
throw new ElasticSearchIllegalStateException("has_child query hasn't executed properly");
}
return new ParentWeight(rewrittenChildQuery.createWeight(searcher));
int size = uidToScore.size();
if (size == 0) {
return Queries.NO_MATCH_QUERY.createWeight(searcher);
}
Filter parentFilter;
if (size == 1) {
BytesRef id = uidToScore.keySet().iterator().next().toBytesRef();
parentFilter = new TermFilter(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(parentType, id)));
} else if (size <= shortCircuitParentDocSet) {
parentFilter = new ParentIdsFilter(parentType, uidToScore.keySet());
} else {
parentFilter = this.parentFilter;
}
return new ParentWeight(rewrittenChildQuery.createWeight(searcher), parentFilter, size);
}
class ParentWeight extends Weight {
final class ParentWeight extends Weight {
final Weight childWeight;
final Filter parentFilter;
int remaining;
public ParentWeight(Weight childWeight) {
public ParentWeight(Weight childWeight, Filter parentFilter, int remaining) {
this.childWeight = childWeight;
this.parentFilter = parentFilter;
this.remaining = remaining;
}
@Override
@ -195,7 +217,7 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
if (parentsSet == null || parentsSet == DocIdSet.EMPTY_DOCIDSET) {
if (parentsSet == null || parentsSet == DocIdSet.EMPTY_DOCIDSET || remaining == 0) {
return null;
}
@ -203,51 +225,73 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
DocIdSetIterator parentsIterator = parentsSet.iterator();
switch (scoreType) {
case AVG:
return new AvgParentScorer(this, idTypeCache, uidToScore, uidToCount, parentsIterator);
return new AvgParentScorer(this, idTypeCache, parentsIterator);
default:
return new ParentScorer(this, idTypeCache, uidToScore, parentsIterator);
return new ParentScorer(this, idTypeCache, parentsIterator);
}
}
}
class ParentScorer extends Scorer {
static class ParentScorer extends Scorer {
final IdReaderTypeCache idTypeCache;
final DocIdSetIterator parentsIterator;
final IdReaderTypeCache idTypeCache;
final TObjectFloatMap<HashedBytesArray> uidToScore;
final DocIdSetIterator parentsIterator;
int remaining;
int currentDocId = -1;
float currentScore;
int currentDocId = -1;
float currentScore;
ParentScorer(Weight weight, IdReaderTypeCache idTypeCache, DocIdSetIterator parentsIterator) {
super(weight);
this.idTypeCache = idTypeCache;
this.parentsIterator = parentsIterator;
this.remaining = uidToScore.size();
}
ParentScorer(Weight weight, IdReaderTypeCache idTypeCache, TObjectFloatMap<HashedBytesArray> uidToScore, DocIdSetIterator parentsIterator) {
super(weight);
this.idTypeCache = idTypeCache;
this.uidToScore = uidToScore;
this.parentsIterator = parentsIterator;
}
@Override
public float score() throws IOException {
return currentScore;
}
@Override
public float score() throws IOException {
return currentScore;
}
@Override
public int freq() throws IOException {
// We don't have the original child query hit info here...
// But the freq of the children could be collector and returned here, but makes this Scorer more expensive.
return 1;
}
@Override
public int freq() throws IOException {
// We don't have the original child query hit info here...
// But the freq of the children could be collector and returned here, but makes this Scorer more expensive.
return 1;
}
@Override
public int docID() {
return currentDocId;
}
@Override
public int docID() {
return currentDocId;
}
@Override
public int nextDoc() throws IOException {
if (remaining == 0) {
return NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
while (true) {
currentDocId = parentsIterator.nextDoc();
while (true) {
currentDocId = parentsIterator.nextDoc();
if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) {
return currentDocId;
}
HashedBytesArray uid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(uid);
if (currentScore != 0) {
remaining--;
return currentDocId;
}
}
}
@Override
public int advance(int target) throws IOException {
if (remaining == 0) {
return NO_MORE_DOCS;
}
currentDocId = parentsIterator.advance(target);
if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) {
return currentDocId;
}
@ -255,76 +299,64 @@ public class ChildrenQuery extends Query implements SearchContext.Rewrite {
HashedBytesArray uid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(uid);
if (currentScore != 0) {
remaining--;
return currentDocId;
} else {
return nextDoc();
}
}
}
@Override
public int advance(int target) throws IOException {
currentDocId = parentsIterator.advance(target);
if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) {
return currentDocId;
}
HashedBytesArray uid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(uid);
if (currentScore != 0) {
return currentDocId;
} else {
return nextDoc();
@Override
public long cost() {
return parentsIterator.cost();
}
}
@Override
public long cost() {
return parentsIterator.cost();
}
}
final class AvgParentScorer extends ParentScorer {
static class AvgParentScorer extends ParentScorer {
HashedBytesArray currentUid;
final TObjectIntMap<HashedBytesArray> uidToCount;
HashedBytesArray currentUid;
AvgParentScorer(Weight weight, IdReaderTypeCache idTypeCache, DocIdSetIterator parentsIterator) {
super(weight, idTypeCache, parentsIterator);
}
AvgParentScorer(Weight weight, IdReaderTypeCache idTypeCache, TObjectFloatMap<HashedBytesArray> uidToScore, TObjectIntMap<HashedBytesArray> uidToCount, DocIdSetIterator parentsIterator) {
super(weight, idTypeCache, uidToScore, parentsIterator);
this.uidToCount = uidToCount;
}
@Override
public int nextDoc() throws IOException {
while (true) {
currentDocId = parentsIterator.nextDoc();
if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) {
return currentDocId;
}
@Override
public int nextDoc() throws IOException {
while (true) {
currentDocId = parentsIterator.nextDoc();
currentUid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(currentUid);
if (currentScore != 0) {
remaining--;
currentScore /= uidToCount.get(currentUid);
return currentDocId;
}
}
}
@Override
public int advance(int target) throws IOException {
currentDocId = parentsIterator.advance(target);
if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) {
return currentDocId;
}
currentUid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(currentUid);
HashedBytesArray uid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(uid);
if (currentScore != 0) {
remaining--;
currentScore /= uidToCount.get(currentUid);
return currentDocId;
} else {
return nextDoc();
}
}
}
@Override
public int advance(int target) throws IOException {
currentDocId = parentsIterator.advance(target);
if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) {
return currentDocId;
}
HashedBytesArray uid = idTypeCache.idByDoc(currentDocId);
currentScore = uidToScore.get(uid);
if (currentScore != 0) {
currentScore /= uidToCount.get(currentUid);
return currentDocId;
} else {
return nextDoc();
}
}
}
static class ChildUidCollector extends ParentIdCollector {

View File

@ -22,15 +22,21 @@ package org.elasticsearch.index.search.child;
import gnu.trove.set.hash.THashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import org.elasticsearch.common.lucene.docset.MatchDocIdSet;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.index.cache.id.IdReaderTypeCache;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
@ -45,15 +51,19 @@ public class HasChildFilter extends Filter implements SearchContext.Rewrite {
final String childType;
final Filter parentFilter;
final SearchContext searchContext;
final int shortCircuitParentDocSet;
Filter shortCircuitFilter;
int remaining;
THashSet<HashedBytesArray> collectedUids;
public HasChildFilter(Query childQuery, String parentType, String childType, Filter parentFilter, SearchContext searchContext) {
public HasChildFilter(Query childQuery, String parentType, String childType, Filter parentFilter, SearchContext searchContext, int shortCircuitParentDocSet) {
this.parentFilter = parentFilter;
this.searchContext = searchContext;
this.parentType = parentType;
this.childType = childType;
this.childQuery = childQuery;
this.shortCircuitParentDocSet = shortCircuitParentDocSet;
}
@Override
@ -93,6 +103,12 @@ public class HasChildFilter extends Filter implements SearchContext.Rewrite {
if (collectedUids == null) {
throw new ElasticSearchIllegalStateException("has_child filter hasn't executed properly");
}
if (remaining == 0) {
return null;
}
if (shortCircuitFilter != null) {
return shortCircuitFilter.getDocIdSet(context, acceptDocs);
}
DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, null);
if (DocIdSets.isEmpty(parentDocIdSet)) {
@ -114,6 +130,15 @@ public class HasChildFilter extends Filter implements SearchContext.Rewrite {
collectedUids = searchContext.cacheRecycler().popHashSet();
UidCollector collector = new UidCollector(parentType, searchContext, collectedUids);
searchContext.searcher().search(childQuery, collector);
remaining = collectedUids.size();
if (remaining == 0) {
shortCircuitFilter = Queries.MATCH_NO_FILTER;
} else if (remaining == 1) {
BytesRef id = collectedUids.iterator().next().toBytesRef();
shortCircuitFilter = new TermFilter(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(parentType, id)));
} else if (remaining <= shortCircuitParentDocSet) {
shortCircuitFilter = new ParentIdsFilter(parentType, collectedUids);
}
}
@Override
@ -122,9 +147,10 @@ public class HasChildFilter extends Filter implements SearchContext.Rewrite {
searchContext.cacheRecycler().pushHashSet(collectedUids);
}
collectedUids = null;
shortCircuitFilter = null;
}
final static class ParentDocSet extends MatchDocIdSet {
final class ParentDocSet extends MatchDocIdSet {
final IndexReader reader;
final THashSet<HashedBytesArray> parents;
@ -139,7 +165,16 @@ public class HasChildFilter extends Filter implements SearchContext.Rewrite {
@Override
protected boolean matchDoc(int doc) {
return parents.contains(typeCache.idByDoc(doc));
if (remaining == 0) {
shortCircuit();
return false;
}
boolean match = parents.contains(typeCache.idByDoc(doc));
if (match) {
remaining--;
}
return match;
}
}

View File

@ -78,6 +78,9 @@ public class HasParentFilter extends Filter implements SearchContext.Rewrite {
if (parents == null) {
throw new ElasticSearchIllegalStateException("has_parent filter hasn't executed properly");
}
if (parents.isEmpty()) {
return null;
}
DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(readerContext, null);
if (DocIdSets.isEmpty(childrenDocIdSet)) {

View File

@ -0,0 +1,92 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search.child;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.util.Set;
/**
* Advantages over using this filter over Lucene's TermsFilter in the parent child context:
* 1) Don't need to copy all values over to a list from the id cache and then
* copy all the ids values over to one continuous byte array. Should save a lot of of object creations and gcs..
* 2) We filter docs by one field only.
* 3) We can directly reference to values that originate from the id cache.
*/
final class ParentIdsFilter extends Filter {
private final BytesRef parentTypeBr;
private final Set<HashedBytesArray> collectedUids;
public ParentIdsFilter(String parentType, Set<HashedBytesArray> collectedUids) {
this.parentTypeBr = new BytesRef(parentType);
this.collectedUids = collectedUids;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(UidFieldMapper.NAME);
if (terms == null) {
return null;
}
TermsEnum termsEnum = terms.iterator(null);
BytesRef uidSpare = new BytesRef();
BytesRef idSpare = new BytesRef();
DocsEnum docsEnum = null;
FixedBitSet result = null;
for (HashedBytesArray parentId : collectedUids) {
idSpare.bytes = parentId.toBytes();
idSpare.length = idSpare.bytes.length;
Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare);
if (termsEnum.seekExact(uidSpare, false)) {
int docId;
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
if (result == null) {
docId = docsEnum.nextDoc();
if (docId != DocIdSetIterator.NO_MORE_DOCS) {
result = new FixedBitSet(context.reader().maxDoc());
result.set(docId);
} else {
continue;
}
}
for (docId = docsEnum.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
result.set(docId);
}
}
}
return result;
}
}

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
import org.elasticsearch.common.lucene.search.NoopCollector;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.cache.id.IdReaderTypeCache;
import org.elasticsearch.search.internal.SearchContext;
@ -136,6 +137,10 @@ public class ParentQuery extends Query implements SearchContext.Rewrite {
if (uidToScore == null) {
throw new ElasticSearchIllegalStateException("has_parent query hasn't executed properly");
}
if (uidToScore.isEmpty()) {
return Queries.NO_MATCH_QUERY.createWeight(searcher);
}
return new ChildWeight(rewrittenParentQuery.createWeight(searcher));
}

View File

@ -42,7 +42,7 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.hasChildFilter;
import static org.elasticsearch.index.query.FilterBuilders.hasParentFilter;
import static org.elasticsearch.index.query.FilterBuilders.rangeFilter;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
@ -59,10 +59,10 @@ public class ChildSearchBenchmark {
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.build();
Node node1 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node1")).node();
Node node1 = nodeBuilder().clusterName("classic").settings(settingsBuilder().put(settings).put("name", "node1")).node();
Client client = node1.client();
long COUNT = SizeValue.parseSizeValue("1m").singles();
long COUNT = SizeValue.parseSizeValue("10m").singles();
int CHILD_COUNT = 5;
int BATCH = 100;
int QUERY_WARMUP = 20;
@ -88,11 +88,11 @@ public class ChildSearchBenchmark {
for (int j = 0; j < BATCH; j++) {
counter++;
request.add(Requests.indexRequest(indexName).type("parent").id(Integer.toString(counter))
.source(parentSource(Integer.toString(counter), "test" + counter)));
.source(parentSource(counter, "test" + counter)));
for (int k = 0; k < CHILD_COUNT; k++) {
request.add(Requests.indexRequest(indexName).type("child").id(Integer.toString(counter) + "_" + k)
.parent(Integer.toString(counter))
.source(childSource(Integer.toString(counter), "tag" + k)));
.source(childSource(counter, "tag" + k)));
}
}
BulkResponse response = request.execute().actionGet();
@ -113,12 +113,12 @@ public class ChildSearchBenchmark {
}
}
client.admin().indices().prepareRefresh().execute().actionGet();
System.out.println("--> Number of docs in index: " + client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount());
System.out.println("--> Number of docs in index: " + client.prepareCount(indexName).setQuery(matchAllQuery()).execute().actionGet().getCount());
System.out.println("--> Running just child query");
// run just the child query, warm up first
for (int j = 0; j < QUERY_WARMUP; j++) {
SearchResponse searchResponse = client.prepareSearch().setQuery(termQuery("child.tag", "tag1")).execute().actionGet();
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(termQuery("child.tag", "tag1")).execute().actionGet();
if (j == 0) {
System.out.println("--> Warmup took: " + searchResponse.getTook());
}
@ -141,7 +141,7 @@ public class ChildSearchBenchmark {
.setJvm(true).execute().actionGet();
System.out.println("--> Committed heap size: " + statsResponse.getNodes()[0].getJvm().getMem().getHeapCommitted());
System.out.println("--> Used heap size: " + statsResponse.getNodes()[0].getJvm().getMem().getHeapUsed());
// run parent child constant query
for (int j = 0; j < QUERY_WARMUP; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName)
@ -202,7 +202,33 @@ public class ChildSearchBenchmark {
}
System.out.println("--> has_child filter with match_all child query, Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
// run parent child constant query
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(
filteredQuery(matchAllQuery(), hasChildFilter("child", termQuery("id", Integer.toString(j + 1))))
).execute().actionGet();
long expected = 1;
if (searchResponse.getHits().totalHits() != expected) {
System.err.println("mismatch on hits");
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child filter with single parent match Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
double expected = Math.pow((j + 1), 3) * CHILD_COUNT;
SearchResponse searchResponse = client.prepareSearch(indexName)
.setQuery(filteredQuery(matchAllQuery(), hasChildFilter("child", constantScoreQuery(rangeFilter("num").lte(expected)))))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != expected) {
System.err.println("mismatch on hits: " + searchResponse.getHits().totalHits() + " != " + expected);
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child filter with exponential parent results Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
/*// run parent child constant query
for (int j = 0; j < QUERY_WARMUP; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName)
.setQuery(
@ -298,7 +324,7 @@ public class ChildSearchBenchmark {
// }
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> top_children, with match_all Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
System.out.println("--> top_children, with match_all Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");*/
statsResponse = client.admin().cluster().prepareNodesStats()
.setJvm(true).setIndices(true).execute().actionGet();
@ -324,7 +350,7 @@ public class ChildSearchBenchmark {
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(hasChildQuery("child", matchAllQuery()).scoreType("max")).execute().actionGet();
@ -335,8 +361,30 @@ public class ChildSearchBenchmark {
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child query with match_all Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
System.out.println("--> Running has_parent query with score type");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(hasChildQuery("child", termQuery("id", Integer.toString(j + 1))).scoreType("max")).execute().actionGet();
long expected = 1;
if (searchResponse.getHits().totalHits() != expected) {
System.err.println("mismatch on hits");
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child query with single parent match Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
double expected = Math.pow((j + 1), 3) * CHILD_COUNT;
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(hasChildQuery("child", constantScoreQuery(rangeFilter("num").lte(expected))).scoreType("max")).execute().actionGet();
if (searchResponse.getHits().totalHits() != expected) {
System.err.println("mismatch on hits: " + searchResponse.getHits().totalHits() + " != " + expected);
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child query with exponential parent results Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
/*System.out.println("--> Running has_parent query with score type");
// run parent child score query
for (int j = 0; j < QUERY_WARMUP; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(hasParentQuery("parent", termQuery("name", "test1")).scoreType("score")).execute().actionGet();
@ -363,8 +411,7 @@ public class ChildSearchBenchmark {
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_parent query with match_all Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
System.out.println("--> has_parent query with match_all Query Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");*/
System.gc();
statsResponse = client.admin().cluster().prepareNodesStats()
@ -377,11 +424,11 @@ public class ChildSearchBenchmark {
node1.close();
}
private static XContentBuilder parentSource(String id, String nameValue) throws IOException {
return jsonBuilder().startObject().field("id", id).field("name", nameValue).endObject();
private static XContentBuilder parentSource(int id, String nameValue) throws IOException {
return jsonBuilder().startObject().field("id", Integer.toString(id)).field("num", id).field("name", nameValue).endObject();
}
private static XContentBuilder childSource(String id, String tag) throws IOException {
return jsonBuilder().startObject().field("id", id).field("tag", tag).endObject();
private static XContentBuilder childSource(int id, String tag) throws IOException {
return jsonBuilder().startObject().field("id", Integer.toString(id)).field("num", id).field("tag", tag).endObject();
}
}

View File

@ -0,0 +1,206 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.benchmark.search.child;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;
import java.io.IOException;
import java.util.Arrays;
import static org.elasticsearch.client.Requests.createIndexRequest;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.hasChildFilter;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
/**
*
*/
public class ChildSearchShortCircuitBenchmark {
public static void main(String[] args) throws Exception {
Settings settings = settingsBuilder()
.put("index.engine.robin.refreshInterval", "-1")
.put("gateway.type", "local")
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.build();
Node node1 = nodeBuilder().clusterName("bench1").settings(settingsBuilder().put(settings).put("name", "node1")).node();
Client client = node1.client();
long PARENT_COUNT = SizeValue.parseSizeValue("10M").singles();
int BATCH = 100;
int QUERY_WARMUP = 5;
int QUERY_COUNT = 25;
String indexName = "test";
client.admin().cluster().prepareHealth(indexName).setWaitForGreenStatus().setTimeout("10s").execute().actionGet();
try {
client.admin().indices().create(createIndexRequest(indexName)).actionGet();
client.admin().indices().preparePutMapping(indexName).setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_parent").field("type", "parent").endObject()
.endObject().endObject()).execute().actionGet();
Thread.sleep(5000);
StopWatch stopWatch = new StopWatch().start();
System.out.println("--> Indexing [" + PARENT_COUNT + "] parent document and some child documents");
long ITERS = PARENT_COUNT / BATCH;
int i = 1;
int counter = 0;
for (; i <= ITERS; i++) {
BulkRequestBuilder request = client.prepareBulk();
for (int j = 0; j < BATCH; j++) {
counter++;
request.add(Requests.indexRequest(indexName).type("parent").id(Integer.toString(counter))
.source(parentSource(counter)));
}
BulkResponse response = request.execute().actionGet();
if (response.hasFailures()) {
System.err.println("--> failures...");
}
if (((i * BATCH) % 10000) == 0) {
System.out.println("--> Indexed " + (i * BATCH) + "parent docs; took " + stopWatch.stop().lastTaskTime());
stopWatch.start();
}
}
int id = 0;
for (i = 1; i <= PARENT_COUNT; i *= 2) {
int parentId = 1;
for (int j = 0; j < i; j++) {
client.prepareIndex(indexName, "child", Integer.toString(id++))
.setParent(Integer.toString(parentId++))
.setSource(childSource(i))
.execute().actionGet();
}
}
System.out.println("--> Indexing took " + stopWatch.totalTime());
} catch (Exception e) {
System.out.println("--> Index already exists, ignoring indexing phase, waiting for green");
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth(indexName).setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
if (clusterHealthResponse.isTimedOut()) {
System.err.println("--> Timed out waiting for cluster health");
}
}
client.admin().indices().prepareRefresh().execute().actionGet();
System.out.println("--> Number of docs in index: " + client.prepareCount(indexName).setQuery(matchAllQuery()).execute().actionGet().getCount());
System.out.println("--> Running just child query");
// run just the child query, warm up first
for (int i = 1; i <= 10000; i *= 2) {
SearchResponse searchResponse = client.prepareSearch(indexName).setQuery(matchQuery("child.field2", i)).execute().actionGet();
System.out.println("--> Warmup took["+ i +"]: " + searchResponse.getTook());
if (searchResponse.getHits().totalHits() != i) {
System.err.println("--> mismatch on hits");
}
}
NodesStatsResponse statsResponse = client.admin().cluster().prepareNodesStats()
.setJvm(true).execute().actionGet();
System.out.println("--> Committed heap size: " + statsResponse.getNodes()[0].getJvm().getMem().getHeapCommitted());
System.out.println("--> Used heap size: " + statsResponse.getNodes()[0].getJvm().getMem().getHeapUsed());
// run parent child constant query
for (int j = 1; j < QUERY_WARMUP; j *= 2) {
SearchResponse searchResponse = client.prepareSearch(indexName)
.setQuery(
hasChildQuery("child", matchQuery("field2", j))
)
.execute().actionGet();
if (searchResponse.getFailedShards() > 0) {
System.err.println("Search Failures " + Arrays.toString(searchResponse.getShardFailures()));
}
if (searchResponse.getHits().totalHits() != j) {
System.err.println("--> mismatch on hits [" + j + "], got [" + searchResponse.getHits().totalHits() + "], expected [" + PARENT_COUNT + "]");
}
}
long totalQueryTime = 0;
for (int i = 1; i < PARENT_COUNT; i *= 2) {
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName)
.setQuery(filteredQuery(matchAllQuery(), hasChildFilter("child", matchQuery("field2", i))))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != i) {
System.err.println("--> mismatch on hits");
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child filter " + i +" Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
}
statsResponse = client.admin().cluster().prepareNodesStats()
.setJvm(true).setIndices(true).execute().actionGet();
System.out.println("--> Id cache size: " + statsResponse.getNodes()[0].getIndices().getIdCache().getMemorySize());
System.out.println("--> Used heap size: " + statsResponse.getNodes()[0].getJvm().getMem().getHeapUsed());
totalQueryTime = 0;
for (int i = 1; i < PARENT_COUNT; i *= 2) {
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch(indexName)
.setQuery(hasChildQuery("child", matchQuery("field2", i)).scoreType("max"))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != i) {
System.err.println("--> mismatch on hits");
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> has_child query " + i +" Avg: " + (totalQueryTime / QUERY_COUNT) + "ms");
}
System.gc();
statsResponse = client.admin().cluster().prepareNodesStats()
.setJvm(true).setIndices(true).execute().actionGet();
System.out.println("--> Id cache size: " + statsResponse.getNodes()[0].getIndices().getIdCache().getMemorySize());
System.out.println("--> Used heap size: " + statsResponse.getNodes()[0].getJvm().getMem().getHeapUsed());
client.close();
node1.close();
}
private static XContentBuilder parentSource(int val) throws IOException {
return jsonBuilder().startObject().field("field1", Integer.toString(val)).endObject();
}
private static XContentBuilder childSource(int val) throws IOException {
return jsonBuilder().startObject().field("field2", Integer.toString(val)).endObject();
}
}