inner_hits: Replace `NestedChildrenQuery` with `ParentChildrenBlockJoinQuery`.

Closes #24009
This commit is contained in:
Martijn van Groningen 2017-04-10 14:03:16 +02:00
parent 53d4d747a6
commit 887f3ed8dc
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
2 changed files with 4 additions and 223 deletions

View File

@ -19,26 +19,18 @@
package org.elasticsearch.search.fetch.subphase;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesTermsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.search.join.ParentChildrenBlockJoinQuery;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
@ -48,9 +40,9 @@ import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.mapper.ParentFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.UidFieldMapper;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.SubSearchContext;
@ -131,7 +123,8 @@ public final class InnerHitsContext {
}
BitSetProducer parentFilter = context.bitsetFilterCache().getBitSetProducer(rawParentFilter);
Query childFilter = childObjectMapper.nestedTypeFilter();
Query q = Queries.filtered(query(), new NestedChildrenQuery(parentFilter, childFilter, hitContext));
int parentDocId = hitContext.readerContext().docBase + hitContext.docId();
Query q = Queries.filtered(query(), new ParentChildrenBlockJoinQuery(parentFilter, childFilter, parentDocId));
if (size() == 0) {
return new TopDocs(context.searcher().count(q), Lucene.EMPTY_SCORE_DOCS, 0);
@ -156,120 +149,6 @@ public final class InnerHitsContext {
}
}
// A filter that only emits the nested children docs of a specific nested parent doc
static class NestedChildrenQuery extends Query {
private final BitSetProducer parentFilter;
private final Query childFilter;
private final int docId;
private final LeafReader leafReader;
NestedChildrenQuery(BitSetProducer parentFilter, Query childFilter, FetchSubPhase.HitContext hitContext) {
this.parentFilter = parentFilter;
this.childFilter = childFilter;
this.docId = hitContext.docId();
this.leafReader = hitContext.readerContext().reader();
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
NestedChildrenQuery other = (NestedChildrenQuery) obj;
return parentFilter.equals(other.parentFilter)
&& childFilter.equals(other.childFilter)
&& docId == other.docId
&& leafReader.getCoreCacheKey() == other.leafReader.getCoreCacheKey();
}
@Override
public int hashCode() {
int hash = classHash();
hash = 31 * hash + parentFilter.hashCode();
hash = 31 * hash + childFilter.hashCode();
hash = 31 * hash + docId;
hash = 31 * hash + leafReader.getCoreCacheKey().hashCode();
return hash;
}
@Override
public String toString(String field) {
return "NestedChildren(parent=" + parentFilter + ",child=" + childFilter + ")";
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final Weight childWeight = childFilter.createWeight(searcher, false);
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
// Nested docs only reside in a single segment, so no need to evaluate all segments
if (!context.reader().getCoreCacheKey().equals(leafReader.getCoreCacheKey())) {
return null;
}
// If docId == 0 then we a parent doc doesn't have child docs, because child docs are stored
// before the parent doc and because parent doc is 0 we can safely assume that there are no child docs.
if (docId == 0) {
return null;
}
final BitSet parents = parentFilter.getBitSet(context);
final int firstChildDocId = parents.prevSetBit(docId - 1) + 1;
// A parent doc doesn't have child docs, so we can early exit here:
if (firstChildDocId == docId) {
return null;
}
final Scorer childrenScorer = childWeight.scorer(context);
if (childrenScorer == null) {
return null;
}
DocIdSetIterator childrenIterator = childrenScorer.iterator();
final DocIdSetIterator it = new DocIdSetIterator() {
int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
target = Math.max(firstChildDocId, target);
if (target >= docId) {
// We're outside the child nested scope, so it is done
return doc = NO_MORE_DOCS;
} else {
int advanced = childrenIterator.advance(target);
if (advanced >= docId) {
// We're outside the child nested scope, so it is done
return doc = NO_MORE_DOCS;
} else {
return doc = advanced;
}
}
}
@Override
public long cost() {
return Math.min(childrenIterator.cost(), docId - firstChildDocId);
}
};
return new ConstantScoreScorer(this, score(), it);
}
};
}
}
}
public static final class ParentChildInnerHits extends BaseInnerHits {

View File

@ -1,98 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.fetch.subphase;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.store.Directory;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext.NestedInnerHits.NestedChildrenQuery;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
import java.util.List;
import static org.hamcrest.Matchers.equalTo;
public class NestedChildrenFilterTests extends ESTestCase {
public void testNestedChildrenFilter() throws Exception {
int numParentDocs = scaledRandomIntBetween(0, 32);
int maxChildDocsPerParent = scaledRandomIntBetween(8, 16);
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numParentDocs; i++) {
int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent);
List<Document> docs = new ArrayList<>(numChildDocs + 1);
for (int j = 0; j < numChildDocs; j++) {
Document childDoc = new Document();
childDoc.add(new StringField("type", "child", Field.Store.NO));
docs.add(childDoc);
}
Document parenDoc = new Document();
parenDoc.add(new StringField("type", "parent", Field.Store.NO));
parenDoc.add(new LegacyIntField("num_child_docs", numChildDocs, Field.Store.YES));
docs.add(parenDoc);
writer.addDocuments(docs);
}
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
Query childFilter = new TermQuery(new Term("type", "child"));
int checkedParents = 0;
final Weight parentsWeight = searcher.createNormalizedWeight(new TermQuery(new Term("type", "parent")), false);
for (LeafReaderContext leaf : reader.leaves()) {
DocIdSetIterator parents = parentsWeight.scorer(leaf).iterator();
for (int parentDoc = parents.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS ; parentDoc = parents.nextDoc()) {
int expectedChildDocs = leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue();
hitContext.reset(null, leaf, parentDoc, searcher);
NestedChildrenQuery nestedChildrenFilter = new NestedChildrenQuery(parentFilter, childFilter, hitContext);
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector);
assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs));
checkedParents++;
}
}
assertThat(checkedParents, equalTo(numParentDocs));
reader.close();
dir.close();
}
}